restart on failure: add optional integer set which exit codes to restart on
This commit is contained in:
parent
6be9ebbc8b
commit
0808c20583
|
@ -110,8 +110,8 @@ let info_ _ endp cert key ca name =
|
||||||
let destroy _ endp cert key ca name =
|
let destroy _ endp cert key ca name =
|
||||||
jump endp cert key ca name (`Unikernel_cmd `Unikernel_destroy)
|
jump endp cert key ca name (`Unikernel_cmd `Unikernel_destroy)
|
||||||
|
|
||||||
let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail =
|
let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||||
| Ok cmd -> jump endp cert key ca name (`Unikernel_cmd cmd)
|
| Ok cmd -> jump endp cert key ca name (`Unikernel_cmd cmd)
|
||||||
| Error (`Msg msg) -> Error (`Msg msg)
|
| Error (`Msg msg) -> Error (`Msg msg)
|
||||||
|
|
||||||
|
@ -208,7 +208,7 @@ let create_cmd =
|
||||||
[`S "DESCRIPTION";
|
[`S "DESCRIPTION";
|
||||||
`P "Creates a virtual machine."]
|
`P "Creates a virtual machine."]
|
||||||
in
|
in
|
||||||
Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)),
|
Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)),
|
||||||
Term.info "create" ~doc ~man
|
Term.info "create" ~doc ~man
|
||||||
|
|
||||||
let console_cmd =
|
let console_cmd =
|
||||||
|
|
|
@ -61,8 +61,8 @@ let info_ _ opt_socket name =
|
||||||
let destroy _ opt_socket name =
|
let destroy _ opt_socket name =
|
||||||
jump opt_socket name (`Unikernel_cmd `Unikernel_destroy)
|
jump opt_socket name (`Unikernel_cmd `Unikernel_destroy)
|
||||||
|
|
||||||
let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail =
|
let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||||
| Ok cmd -> jump opt_socket name (`Unikernel_cmd cmd)
|
| Ok cmd -> jump opt_socket name (`Unikernel_cmd cmd)
|
||||||
| Error (`Msg msg) -> Error (`Msg msg)
|
| Error (`Msg msg) -> Error (`Msg msg)
|
||||||
|
|
||||||
|
@ -153,7 +153,7 @@ let create_cmd =
|
||||||
[`S "DESCRIPTION";
|
[`S "DESCRIPTION";
|
||||||
`P "Creates a virtual machine."]
|
`P "Creates a virtual machine."]
|
||||||
in
|
in
|
||||||
Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail)),
|
Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail $ exit_code)),
|
||||||
Term.info "create" ~doc ~man
|
Term.info "create" ~doc ~man
|
||||||
|
|
||||||
let console_cmd =
|
let console_cmd =
|
||||||
|
|
|
@ -79,7 +79,7 @@ let setup_log style_renderer level =
|
||||||
Logs.set_level level;
|
Logs.set_level level;
|
||||||
Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ())
|
Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ())
|
||||||
|
|
||||||
let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail =
|
let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail exit_codes =
|
||||||
let open Rresult.R.Infix in
|
let open Rresult.R.Infix in
|
||||||
Bos.OS.File.read (Fpath.v image) >>| fun image ->
|
Bos.OS.File.read (Fpath.v image) >>| fun image ->
|
||||||
let image, compressed = match compression with
|
let image, compressed = match compression with
|
||||||
|
@ -88,7 +88,9 @@ let create_vm force image cpuid memory argv block_devices bridges compression re
|
||||||
let img = Vmm_compress.compress ~level image in
|
let img = Vmm_compress.compress ~level image in
|
||||||
Cstruct.of_string img, true
|
Cstruct.of_string img, true
|
||||||
and argv = match argv with [] -> None | xs -> Some xs
|
and argv = match argv with [] -> None | xs -> Some xs
|
||||||
and fail_behaviour = if restart_on_fail then `Restart else `Quit
|
and fail_behaviour =
|
||||||
|
let exits = match exit_codes with [] -> None | xs -> Some (IS.of_list xs) in
|
||||||
|
if restart_on_fail then `Restart exits else `Quit
|
||||||
in
|
in
|
||||||
let config = Unikernel.{ typ = `Solo5 ; compressed ; image ; fail_behaviour ; cpuid ; memory ; block_devices ; bridges ; argv } in
|
let config = Unikernel.{ typ = `Solo5 ; compressed ; image ; fail_behaviour ; cpuid ; memory ; block_devices ; bridges ; argv } in
|
||||||
if force then `Unikernel_force_create config else `Unikernel_create config
|
if force then `Unikernel_force_create config else `Unikernel_create config
|
||||||
|
@ -241,6 +243,10 @@ let restart_on_fail =
|
||||||
let doc = "Restart on fail" in
|
let doc = "Restart on fail" in
|
||||||
Arg.(value & flag & info [ "restart-on-fail" ] ~doc)
|
Arg.(value & flag & info [ "restart-on-fail" ] ~doc)
|
||||||
|
|
||||||
|
let exit_code =
|
||||||
|
let doc = "Exit code to restart on" in
|
||||||
|
Arg.(value & opt_all int [] & info [ "exit-code" ] ~doc)
|
||||||
|
|
||||||
let timestamp_c =
|
let timestamp_c =
|
||||||
let parse s = match Ptime.of_rfc3339 s with
|
let parse s = match Ptime.of_rfc3339 s with
|
||||||
| Ok (t, _, _) -> `Ok t
|
| Ok (t, _, _) -> `Ok t
|
||||||
|
|
|
@ -37,39 +37,19 @@ let rec create stat_out log_out cons_out data_out hdr name config =
|
||||||
Lwt.return (None, fail_cont ())
|
Lwt.return (None, fail_cont ())
|
||||||
| Ok (state', stat, log, data, name, vm) ->
|
| Ok (state', stat, log, data, name, vm) ->
|
||||||
state := state';
|
state := state';
|
||||||
(match Unikernel.(vm.config.fail_behaviour) with
|
(if Unikernel.restart_handler config then
|
||||||
| `Quit -> ()
|
|
||||||
| `Restart ->
|
|
||||||
match Vmm_vmmd.register_restart !state name Lwt.task with
|
match Vmm_vmmd.register_restart !state name Lwt.task with
|
||||||
| None -> ()
|
| None -> ()
|
||||||
| Some (state', task) ->
|
| Some (state', task) ->
|
||||||
state := state';
|
state := state';
|
||||||
Lwt.async (fun () ->
|
Lwt.async (fun () ->
|
||||||
task >>= function
|
task >>= fun r ->
|
||||||
| (`Signal _ | `Stop _) as r ->
|
if should_restart config name r then
|
||||||
Logs.warn (fun m -> m "unikernel %a exited with signal %a"
|
Lwt_mutex.with_lock create_lock (fun () ->
|
||||||
Name.pp name pp_process_exit r);
|
create stat_out log_out cons_out stub_data_out
|
||||||
Lwt.return_unit
|
stub_hdr name vm.Unikernel.config)
|
||||||
| `Exit i ->
|
else
|
||||||
(* results:
|
Lwt.return_unit));
|
||||||
normal exit (i.e. teardown) is 0
|
|
||||||
solo5-exit allows an arbitrary int
|
|
||||||
solo5-abort emits 255
|
|
||||||
solo5 internal error (bad image, bad manigest) is 1
|
|
||||||
ocaml exceptions (out of memory et al) use 2
|
|
||||||
-> soon (4.10) they'll abort == 255
|
|
||||||
signal 11 is if a kill -TERM was sent (i.e. our destroy)
|
|
||||||
|
|
||||||
--> best: user-provided list of which exit codes to restart on
|
|
||||||
(and filter 1 specially)
|
|
||||||
*)
|
|
||||||
match i with
|
|
||||||
| 1 -> Logs.warn (fun m -> m "solo5 exit failure"); Lwt.return_unit
|
|
||||||
| _ ->
|
|
||||||
Logs.info (fun m -> m "solo5 exited with %d, restarting" i);
|
|
||||||
Lwt_mutex.with_lock create_lock (fun () ->
|
|
||||||
create stat_out log_out cons_out stub_data_out
|
|
||||||
stub_hdr name vm.Unikernel.config)));
|
|
||||||
stat_out "setting up stat" stat >>= fun () ->
|
stat_out "setting up stat" stat >>= fun () ->
|
||||||
log_out "setting up log" log >|= fun () ->
|
log_out "setting up log" log >|= fun () ->
|
||||||
(Some vm, data)) >>= fun (started, data) ->
|
(Some vm, data)) >>= fun (started, data) ->
|
||||||
|
|
|
@ -40,8 +40,8 @@ let info_ _ name = jump name (`Unikernel_cmd `Unikernel_info)
|
||||||
let destroy _ name =
|
let destroy _ name =
|
||||||
jump name (`Unikernel_cmd `Unikernel_destroy)
|
jump name (`Unikernel_cmd `Unikernel_destroy)
|
||||||
|
|
||||||
let create _ force name image cpuid memory argv block network compression restart_on_fail =
|
let create _ force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||||
| Ok cmd -> jump name (`Unikernel_cmd cmd)
|
| Ok cmd -> jump name (`Unikernel_cmd cmd)
|
||||||
| Error (`Msg msg) -> Error (`Msg msg)
|
| Error (`Msg msg) -> Error (`Msg msg)
|
||||||
|
|
||||||
|
@ -122,7 +122,7 @@ let create_cmd =
|
||||||
[`S "DESCRIPTION";
|
[`S "DESCRIPTION";
|
||||||
`P "Creates a virtual machine."]
|
`P "Creates a virtual machine."]
|
||||||
in
|
in
|
||||||
Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)),
|
Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)),
|
||||||
Term.info "create" ~doc ~man
|
Term.info "create" ~doc ~man
|
||||||
|
|
||||||
let console_cmd =
|
let console_cmd =
|
||||||
|
|
|
@ -279,15 +279,25 @@ let typ =
|
||||||
let fail_behaviour =
|
let fail_behaviour =
|
||||||
let f = function
|
let f = function
|
||||||
| `C1 () -> `Quit
|
| `C1 () -> `Quit
|
||||||
| `C2 () -> `Restart
|
| `C2 xs ->
|
||||||
|
let exit_codes = match xs with
|
||||||
|
| [] -> None
|
||||||
|
| xs -> Some (IS.of_list xs)
|
||||||
|
in
|
||||||
|
`Restart exit_codes
|
||||||
and g = function
|
and g = function
|
||||||
| `Quit -> `C1 ()
|
| `Quit -> `C1 ()
|
||||||
| `Restart -> `C2 ()
|
| `Restart xs ->
|
||||||
|
let exit_codes = match xs with
|
||||||
|
| None -> []
|
||||||
|
| Some i -> IS.elements i
|
||||||
|
in
|
||||||
|
`C2 exit_codes
|
||||||
in
|
in
|
||||||
Asn.S.map f g @@
|
Asn.S.map f g @@
|
||||||
Asn.S.(choice2
|
Asn.S.(choice2
|
||||||
(explicit 0 null)
|
(explicit 0 null)
|
||||||
(explicit 1 null))
|
(explicit 1 (set_of int)))
|
||||||
|
|
||||||
let unikernel_config =
|
let unikernel_config =
|
||||||
let open Unikernel in
|
let open Unikernel in
|
||||||
|
@ -309,8 +319,8 @@ let unikernel_config =
|
||||||
@ (required ~label:"fail behaviour" fail_behaviour)
|
@ (required ~label:"fail behaviour" fail_behaviour)
|
||||||
@ (required ~label:"cpuid" int)
|
@ (required ~label:"cpuid" int)
|
||||||
@ (required ~label:"memory" int)
|
@ (required ~label:"memory" int)
|
||||||
@ (optional ~label:"blocks" (explicit 0 (sequence_of utf8_string)))
|
@ (optional ~label:"blocks" (explicit 0 (set_of utf8_string)))
|
||||||
@ (optional ~label:"bridges" (explicit 1 (sequence_of utf8_string)))
|
@ (optional ~label:"bridges" (explicit 1 (set_of utf8_string)))
|
||||||
-@ (optional ~label:"arguments"(explicit 2 (sequence_of utf8_string))))
|
-@ (optional ~label:"arguments"(explicit 2 (sequence_of utf8_string))))
|
||||||
|
|
||||||
let unikernel_cmd =
|
let unikernel_cmd =
|
||||||
|
|
|
@ -156,10 +156,14 @@ module Unikernel = struct
|
||||||
let pp_typ ppf = function
|
let pp_typ ppf = function
|
||||||
| `Solo5 -> Fmt.pf ppf "solo5"
|
| `Solo5 -> Fmt.pf ppf "solo5"
|
||||||
|
|
||||||
type fail_behaviour = [ `Quit | `Restart ]
|
type fail_behaviour = [ `Quit | `Restart of IS.t option ]
|
||||||
|
|
||||||
let pp_fail_behaviour ppf f =
|
let pp_fail_behaviour ppf = function
|
||||||
Fmt.string ppf (match f with `Quit -> "quit" | `Restart -> "restart")
|
| `Quit -> Fmt.string ppf "quit"
|
||||||
|
| `Restart codes ->
|
||||||
|
Fmt.pf ppf "restart %a"
|
||||||
|
Fmt.(option ~none:(unit "all except 1") (list ~sep:(unit ", ") int))
|
||||||
|
(match codes with None -> None | Some x -> Some (IS.elements x))
|
||||||
|
|
||||||
type config = {
|
type config = {
|
||||||
typ : typ ;
|
typ : typ ;
|
||||||
|
@ -184,6 +188,9 @@ module Unikernel = struct
|
||||||
Fmt.(list ~sep:(unit ", ") string) vm.bridges
|
Fmt.(list ~sep:(unit ", ") string) vm.bridges
|
||||||
Fmt.(option ~none:(unit "no") (list ~sep:(unit " ") string)) vm.argv
|
Fmt.(option ~none:(unit "no") (list ~sep:(unit " ") string)) vm.argv
|
||||||
|
|
||||||
|
let restart_handler config =
|
||||||
|
match config.fail_behaviour with `Quit -> false | `Restart _ -> true
|
||||||
|
|
||||||
type t = {
|
type t = {
|
||||||
config : config ;
|
config : config ;
|
||||||
cmd : Bos.Cmd.t ;
|
cmd : Bos.Cmd.t ;
|
||||||
|
@ -289,6 +296,43 @@ let pp_process_exit ppf = function
|
||||||
| `Signal n -> Fmt.pf ppf "signal %a (numeric %d)" Fmt.Dump.signal n n
|
| `Signal n -> Fmt.pf ppf "signal %a (numeric %d)" Fmt.Dump.signal n n
|
||||||
| `Stop n -> Fmt.pf ppf "stop %a (numeric %d)" Fmt.Dump.signal n n
|
| `Stop n -> Fmt.pf ppf "stop %a (numeric %d)" Fmt.Dump.signal n n
|
||||||
|
|
||||||
|
let should_restart config name = function
|
||||||
|
| (`Signal _ | `Stop _) as r ->
|
||||||
|
(* signal 11 is if a kill -TERM was sent (i.e. our destroy) *)
|
||||||
|
Logs.warn (fun m -> m "unikernel %a exited with signal %a"
|
||||||
|
Name.pp name pp_process_exit r);
|
||||||
|
false
|
||||||
|
| `Exit i ->
|
||||||
|
(* results (and default behaviour) -- solo5-exit allows an arbitrary int
|
||||||
|
0 normal exit (i.e. teardown) -> restart
|
||||||
|
1 solo5 internal error (bad image, bad manigest) -> no restart, never
|
||||||
|
2 ocaml exceptions (out of memory et al) -> restart
|
||||||
|
64..70 -> no restart (soon to be used by unikernel command line parsing)
|
||||||
|
255 solo5-abort -> soon (OCaml 4.10) fatal error (out of memory) -> restart *)
|
||||||
|
let opt_mem i =
|
||||||
|
match config.Unikernel.fail_behaviour with
|
||||||
|
| `Quit -> assert false
|
||||||
|
| `Restart None -> true
|
||||||
|
| `Restart (Some c) -> IS.mem i c
|
||||||
|
in
|
||||||
|
match i with
|
||||||
|
| 1 ->
|
||||||
|
Logs.warn (fun m -> m "unikernel %a solo5 exit failure (1)"
|
||||||
|
Name.pp name);
|
||||||
|
false
|
||||||
|
| 64 | 65 | 66 | 67 | 68 | 69 | 70 ->
|
||||||
|
Logs.warn (fun m -> m "unikernel %a exited %d, not restarting"
|
||||||
|
Name.pp name i);
|
||||||
|
false
|
||||||
|
| _ when opt_mem i ->
|
||||||
|
Logs.info (fun m -> m "unikernel %a exited %d, restarting"
|
||||||
|
Name.pp name i);
|
||||||
|
true
|
||||||
|
| _ ->
|
||||||
|
Logs.info (fun m -> m "unikernel %a exited %d, not restarting %a"
|
||||||
|
Name.pp name i Unikernel.pp_fail_behaviour config.fail_behaviour);
|
||||||
|
false
|
||||||
|
|
||||||
module Log = struct
|
module Log = struct
|
||||||
type log_event = [
|
type log_event = [
|
||||||
| `Login of Name.t * Ipaddr.V4.t * int
|
| `Login of Name.t * Ipaddr.V4.t * int
|
||||||
|
|
|
@ -58,7 +58,7 @@ module Unikernel : sig
|
||||||
type typ = [ `Solo5 ]
|
type typ = [ `Solo5 ]
|
||||||
val pp_typ : typ Fmt.t
|
val pp_typ : typ Fmt.t
|
||||||
|
|
||||||
type fail_behaviour = [ `Quit | `Restart ]
|
type fail_behaviour = [ `Quit | `Restart of IS.t option ]
|
||||||
|
|
||||||
type config = {
|
type config = {
|
||||||
typ : typ ;
|
typ : typ ;
|
||||||
|
@ -74,6 +74,8 @@ module Unikernel : sig
|
||||||
|
|
||||||
val pp_config : config Fmt.t
|
val pp_config : config Fmt.t
|
||||||
|
|
||||||
|
val restart_handler : config -> bool
|
||||||
|
|
||||||
type t = {
|
type t = {
|
||||||
config : config;
|
config : config;
|
||||||
cmd : Bos.Cmd.t;
|
cmd : Bos.Cmd.t;
|
||||||
|
@ -153,6 +155,8 @@ type process_exit = [ `Exit of int | `Signal of int | `Stop of int ]
|
||||||
|
|
||||||
val pp_process_exit : process_exit Fmt.t
|
val pp_process_exit : process_exit Fmt.t
|
||||||
|
|
||||||
|
val should_restart : Unikernel.config -> Name.t -> process_exit -> bool
|
||||||
|
|
||||||
module Log : sig
|
module Log : sig
|
||||||
type log_event = [
|
type log_event = [
|
||||||
| `Login of Name.t * Ipaddr.V4.t * int
|
| `Login of Name.t * Ipaddr.V4.t * int
|
||||||
|
|
Loading…
Reference in a new issue