restart on failure: add optional integer set which exit codes to restart on
This commit is contained in:
parent
6be9ebbc8b
commit
0808c20583
|
@ -110,8 +110,8 @@ let info_ _ endp cert key ca name =
|
|||
let destroy _ endp cert key ca name =
|
||||
jump endp cert key ca name (`Unikernel_cmd `Unikernel_destroy)
|
||||
|
||||
let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
||||
let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||
| Ok cmd -> jump endp cert key ca name (`Unikernel_cmd cmd)
|
||||
| Error (`Msg msg) -> Error (`Msg msg)
|
||||
|
||||
|
@ -208,7 +208,7 @@ let create_cmd =
|
|||
[`S "DESCRIPTION";
|
||||
`P "Creates a virtual machine."]
|
||||
in
|
||||
Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)),
|
||||
Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)),
|
||||
Term.info "create" ~doc ~man
|
||||
|
||||
let console_cmd =
|
||||
|
|
|
@ -61,8 +61,8 @@ let info_ _ opt_socket name =
|
|||
let destroy _ opt_socket name =
|
||||
jump opt_socket name (`Unikernel_cmd `Unikernel_destroy)
|
||||
|
||||
let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
||||
let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||
| Ok cmd -> jump opt_socket name (`Unikernel_cmd cmd)
|
||||
| Error (`Msg msg) -> Error (`Msg msg)
|
||||
|
||||
|
@ -153,7 +153,7 @@ let create_cmd =
|
|||
[`S "DESCRIPTION";
|
||||
`P "Creates a virtual machine."]
|
||||
in
|
||||
Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail)),
|
||||
Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail $ exit_code)),
|
||||
Term.info "create" ~doc ~man
|
||||
|
||||
let console_cmd =
|
||||
|
|
|
@ -79,7 +79,7 @@ let setup_log style_renderer level =
|
|||
Logs.set_level level;
|
||||
Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ())
|
||||
|
||||
let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail =
|
||||
let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail exit_codes =
|
||||
let open Rresult.R.Infix in
|
||||
Bos.OS.File.read (Fpath.v image) >>| fun image ->
|
||||
let image, compressed = match compression with
|
||||
|
@ -88,7 +88,9 @@ let create_vm force image cpuid memory argv block_devices bridges compression re
|
|||
let img = Vmm_compress.compress ~level image in
|
||||
Cstruct.of_string img, true
|
||||
and argv = match argv with [] -> None | xs -> Some xs
|
||||
and fail_behaviour = if restart_on_fail then `Restart else `Quit
|
||||
and fail_behaviour =
|
||||
let exits = match exit_codes with [] -> None | xs -> Some (IS.of_list xs) in
|
||||
if restart_on_fail then `Restart exits else `Quit
|
||||
in
|
||||
let config = Unikernel.{ typ = `Solo5 ; compressed ; image ; fail_behaviour ; cpuid ; memory ; block_devices ; bridges ; argv } in
|
||||
if force then `Unikernel_force_create config else `Unikernel_create config
|
||||
|
@ -241,6 +243,10 @@ let restart_on_fail =
|
|||
let doc = "Restart on fail" in
|
||||
Arg.(value & flag & info [ "restart-on-fail" ] ~doc)
|
||||
|
||||
let exit_code =
|
||||
let doc = "Exit code to restart on" in
|
||||
Arg.(value & opt_all int [] & info [ "exit-code" ] ~doc)
|
||||
|
||||
let timestamp_c =
|
||||
let parse s = match Ptime.of_rfc3339 s with
|
||||
| Ok (t, _, _) -> `Ok t
|
||||
|
|
|
@ -37,39 +37,19 @@ let rec create stat_out log_out cons_out data_out hdr name config =
|
|||
Lwt.return (None, fail_cont ())
|
||||
| Ok (state', stat, log, data, name, vm) ->
|
||||
state := state';
|
||||
(match Unikernel.(vm.config.fail_behaviour) with
|
||||
| `Quit -> ()
|
||||
| `Restart ->
|
||||
(if Unikernel.restart_handler config then
|
||||
match Vmm_vmmd.register_restart !state name Lwt.task with
|
||||
| None -> ()
|
||||
| Some (state', task) ->
|
||||
state := state';
|
||||
Lwt.async (fun () ->
|
||||
task >>= function
|
||||
| (`Signal _ | `Stop _) as r ->
|
||||
Logs.warn (fun m -> m "unikernel %a exited with signal %a"
|
||||
Name.pp name pp_process_exit r);
|
||||
Lwt.return_unit
|
||||
| `Exit i ->
|
||||
(* results:
|
||||
normal exit (i.e. teardown) is 0
|
||||
solo5-exit allows an arbitrary int
|
||||
solo5-abort emits 255
|
||||
solo5 internal error (bad image, bad manigest) is 1
|
||||
ocaml exceptions (out of memory et al) use 2
|
||||
-> soon (4.10) they'll abort == 255
|
||||
signal 11 is if a kill -TERM was sent (i.e. our destroy)
|
||||
|
||||
--> best: user-provided list of which exit codes to restart on
|
||||
(and filter 1 specially)
|
||||
*)
|
||||
match i with
|
||||
| 1 -> Logs.warn (fun m -> m "solo5 exit failure"); Lwt.return_unit
|
||||
| _ ->
|
||||
Logs.info (fun m -> m "solo5 exited with %d, restarting" i);
|
||||
task >>= fun r ->
|
||||
if should_restart config name r then
|
||||
Lwt_mutex.with_lock create_lock (fun () ->
|
||||
create stat_out log_out cons_out stub_data_out
|
||||
stub_hdr name vm.Unikernel.config)));
|
||||
stub_hdr name vm.Unikernel.config)
|
||||
else
|
||||
Lwt.return_unit));
|
||||
stat_out "setting up stat" stat >>= fun () ->
|
||||
log_out "setting up log" log >|= fun () ->
|
||||
(Some vm, data)) >>= fun (started, data) ->
|
||||
|
|
|
@ -40,8 +40,8 @@ let info_ _ name = jump name (`Unikernel_cmd `Unikernel_info)
|
|||
let destroy _ name =
|
||||
jump name (`Unikernel_cmd `Unikernel_destroy)
|
||||
|
||||
let create _ force name image cpuid memory argv block network compression restart_on_fail =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with
|
||||
let create _ force name image cpuid memory argv block network compression restart_on_fail exit_code =
|
||||
match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with
|
||||
| Ok cmd -> jump name (`Unikernel_cmd cmd)
|
||||
| Error (`Msg msg) -> Error (`Msg msg)
|
||||
|
||||
|
@ -122,7 +122,7 @@ let create_cmd =
|
|||
[`S "DESCRIPTION";
|
||||
`P "Creates a virtual machine."]
|
||||
in
|
||||
Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)),
|
||||
Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)),
|
||||
Term.info "create" ~doc ~man
|
||||
|
||||
let console_cmd =
|
||||
|
|
|
@ -279,15 +279,25 @@ let typ =
|
|||
let fail_behaviour =
|
||||
let f = function
|
||||
| `C1 () -> `Quit
|
||||
| `C2 () -> `Restart
|
||||
| `C2 xs ->
|
||||
let exit_codes = match xs with
|
||||
| [] -> None
|
||||
| xs -> Some (IS.of_list xs)
|
||||
in
|
||||
`Restart exit_codes
|
||||
and g = function
|
||||
| `Quit -> `C1 ()
|
||||
| `Restart -> `C2 ()
|
||||
| `Restart xs ->
|
||||
let exit_codes = match xs with
|
||||
| None -> []
|
||||
| Some i -> IS.elements i
|
||||
in
|
||||
`C2 exit_codes
|
||||
in
|
||||
Asn.S.map f g @@
|
||||
Asn.S.(choice2
|
||||
(explicit 0 null)
|
||||
(explicit 1 null))
|
||||
(explicit 1 (set_of int)))
|
||||
|
||||
let unikernel_config =
|
||||
let open Unikernel in
|
||||
|
@ -309,8 +319,8 @@ let unikernel_config =
|
|||
@ (required ~label:"fail behaviour" fail_behaviour)
|
||||
@ (required ~label:"cpuid" int)
|
||||
@ (required ~label:"memory" int)
|
||||
@ (optional ~label:"blocks" (explicit 0 (sequence_of utf8_string)))
|
||||
@ (optional ~label:"bridges" (explicit 1 (sequence_of utf8_string)))
|
||||
@ (optional ~label:"blocks" (explicit 0 (set_of utf8_string)))
|
||||
@ (optional ~label:"bridges" (explicit 1 (set_of utf8_string)))
|
||||
-@ (optional ~label:"arguments"(explicit 2 (sequence_of utf8_string))))
|
||||
|
||||
let unikernel_cmd =
|
||||
|
|
|
@ -156,10 +156,14 @@ module Unikernel = struct
|
|||
let pp_typ ppf = function
|
||||
| `Solo5 -> Fmt.pf ppf "solo5"
|
||||
|
||||
type fail_behaviour = [ `Quit | `Restart ]
|
||||
type fail_behaviour = [ `Quit | `Restart of IS.t option ]
|
||||
|
||||
let pp_fail_behaviour ppf f =
|
||||
Fmt.string ppf (match f with `Quit -> "quit" | `Restart -> "restart")
|
||||
let pp_fail_behaviour ppf = function
|
||||
| `Quit -> Fmt.string ppf "quit"
|
||||
| `Restart codes ->
|
||||
Fmt.pf ppf "restart %a"
|
||||
Fmt.(option ~none:(unit "all except 1") (list ~sep:(unit ", ") int))
|
||||
(match codes with None -> None | Some x -> Some (IS.elements x))
|
||||
|
||||
type config = {
|
||||
typ : typ ;
|
||||
|
@ -184,6 +188,9 @@ module Unikernel = struct
|
|||
Fmt.(list ~sep:(unit ", ") string) vm.bridges
|
||||
Fmt.(option ~none:(unit "no") (list ~sep:(unit " ") string)) vm.argv
|
||||
|
||||
let restart_handler config =
|
||||
match config.fail_behaviour with `Quit -> false | `Restart _ -> true
|
||||
|
||||
type t = {
|
||||
config : config ;
|
||||
cmd : Bos.Cmd.t ;
|
||||
|
@ -289,6 +296,43 @@ let pp_process_exit ppf = function
|
|||
| `Signal n -> Fmt.pf ppf "signal %a (numeric %d)" Fmt.Dump.signal n n
|
||||
| `Stop n -> Fmt.pf ppf "stop %a (numeric %d)" Fmt.Dump.signal n n
|
||||
|
||||
let should_restart config name = function
|
||||
| (`Signal _ | `Stop _) as r ->
|
||||
(* signal 11 is if a kill -TERM was sent (i.e. our destroy) *)
|
||||
Logs.warn (fun m -> m "unikernel %a exited with signal %a"
|
||||
Name.pp name pp_process_exit r);
|
||||
false
|
||||
| `Exit i ->
|
||||
(* results (and default behaviour) -- solo5-exit allows an arbitrary int
|
||||
0 normal exit (i.e. teardown) -> restart
|
||||
1 solo5 internal error (bad image, bad manigest) -> no restart, never
|
||||
2 ocaml exceptions (out of memory et al) -> restart
|
||||
64..70 -> no restart (soon to be used by unikernel command line parsing)
|
||||
255 solo5-abort -> soon (OCaml 4.10) fatal error (out of memory) -> restart *)
|
||||
let opt_mem i =
|
||||
match config.Unikernel.fail_behaviour with
|
||||
| `Quit -> assert false
|
||||
| `Restart None -> true
|
||||
| `Restart (Some c) -> IS.mem i c
|
||||
in
|
||||
match i with
|
||||
| 1 ->
|
||||
Logs.warn (fun m -> m "unikernel %a solo5 exit failure (1)"
|
||||
Name.pp name);
|
||||
false
|
||||
| 64 | 65 | 66 | 67 | 68 | 69 | 70 ->
|
||||
Logs.warn (fun m -> m "unikernel %a exited %d, not restarting"
|
||||
Name.pp name i);
|
||||
false
|
||||
| _ when opt_mem i ->
|
||||
Logs.info (fun m -> m "unikernel %a exited %d, restarting"
|
||||
Name.pp name i);
|
||||
true
|
||||
| _ ->
|
||||
Logs.info (fun m -> m "unikernel %a exited %d, not restarting %a"
|
||||
Name.pp name i Unikernel.pp_fail_behaviour config.fail_behaviour);
|
||||
false
|
||||
|
||||
module Log = struct
|
||||
type log_event = [
|
||||
| `Login of Name.t * Ipaddr.V4.t * int
|
||||
|
|
|
@ -58,7 +58,7 @@ module Unikernel : sig
|
|||
type typ = [ `Solo5 ]
|
||||
val pp_typ : typ Fmt.t
|
||||
|
||||
type fail_behaviour = [ `Quit | `Restart ]
|
||||
type fail_behaviour = [ `Quit | `Restart of IS.t option ]
|
||||
|
||||
type config = {
|
||||
typ : typ ;
|
||||
|
@ -74,6 +74,8 @@ module Unikernel : sig
|
|||
|
||||
val pp_config : config Fmt.t
|
||||
|
||||
val restart_handler : config -> bool
|
||||
|
||||
type t = {
|
||||
config : config;
|
||||
cmd : Bos.Cmd.t;
|
||||
|
@ -153,6 +155,8 @@ type process_exit = [ `Exit of int | `Signal of int | `Stop of int ]
|
|||
|
||||
val pp_process_exit : process_exit Fmt.t
|
||||
|
||||
val should_restart : Unikernel.config -> Name.t -> process_exit -> bool
|
||||
|
||||
module Log : sig
|
||||
type log_event = [
|
||||
| `Login of Name.t * Ipaddr.V4.t * int
|
||||
|
|
Loading…
Reference in a new issue