From 0808c2058322e378352157368af7a39fc739c69f Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 12 Oct 2019 00:01:36 +0200 Subject: [PATCH] restart on failure: add optional integer set which exit codes to restart on --- client/albatross_client_bistro.ml | 6 +-- client/albatross_client_local.ml | 6 +-- command-line/albatross_cli.ml | 10 ++++- daemon/albatrossd.ml | 36 ++++------------- provision/albatross_provision_request.ml | 6 +-- src/vmm_asn.ml | 20 +++++++--- src/vmm_core.ml | 50 ++++++++++++++++++++++-- src/vmm_core.mli | 6 ++- 8 files changed, 92 insertions(+), 48 deletions(-) diff --git a/client/albatross_client_bistro.ml b/client/albatross_client_bistro.ml index 9e86800..08ab831 100644 --- a/client/albatross_client_bistro.ml +++ b/client/albatross_client_bistro.ml @@ -110,8 +110,8 @@ let info_ _ endp cert key ca name = let destroy _ endp cert key ca name = jump endp cert key ca name (`Unikernel_cmd `Unikernel_destroy) -let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail = - match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with +let create _ endp cert key ca force name image cpuid memory argv block network compression restart_on_fail exit_code = + match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with | Ok cmd -> jump endp cert key ca name (`Unikernel_cmd cmd) | Error (`Msg msg) -> Error (`Msg msg) @@ -208,7 +208,7 @@ let create_cmd = [`S "DESCRIPTION"; `P "Creates a virtual machine."] in - Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)), + Term.(term_result (const create $ setup_log $ destination $ ca_cert $ ca_key $ server_ca $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)), Term.info "create" ~doc ~man let console_cmd = diff --git a/client/albatross_client_local.ml b/client/albatross_client_local.ml index 2b98c79..dfc7c6b 100644 --- a/client/albatross_client_local.ml +++ b/client/albatross_client_local.ml @@ -61,8 +61,8 @@ let info_ _ opt_socket name = let destroy _ opt_socket name = jump opt_socket name (`Unikernel_cmd `Unikernel_destroy) -let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail = - match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with +let create _ opt_socket force name image cpuid memory argv block network compression restart_on_fail exit_code = + match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with | Ok cmd -> jump opt_socket name (`Unikernel_cmd cmd) | Error (`Msg msg) -> Error (`Msg msg) @@ -153,7 +153,7 @@ let create_cmd = [`S "DESCRIPTION"; `P "Creates a virtual machine."] in - Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail)), + Term.(term_result (const create $ setup_log $ socket $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 0 $ restart_on_fail $ exit_code)), Term.info "create" ~doc ~man let console_cmd = diff --git a/command-line/albatross_cli.ml b/command-line/albatross_cli.ml index c49a61f..25d9982 100644 --- a/command-line/albatross_cli.ml +++ b/command-line/albatross_cli.ml @@ -79,7 +79,7 @@ let setup_log style_renderer level = Logs.set_level level; Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ()) -let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail = +let create_vm force image cpuid memory argv block_devices bridges compression restart_on_fail exit_codes = let open Rresult.R.Infix in Bos.OS.File.read (Fpath.v image) >>| fun image -> let image, compressed = match compression with @@ -88,7 +88,9 @@ let create_vm force image cpuid memory argv block_devices bridges compression re let img = Vmm_compress.compress ~level image in Cstruct.of_string img, true and argv = match argv with [] -> None | xs -> Some xs - and fail_behaviour = if restart_on_fail then `Restart else `Quit + and fail_behaviour = + let exits = match exit_codes with [] -> None | xs -> Some (IS.of_list xs) in + if restart_on_fail then `Restart exits else `Quit in let config = Unikernel.{ typ = `Solo5 ; compressed ; image ; fail_behaviour ; cpuid ; memory ; block_devices ; bridges ; argv } in if force then `Unikernel_force_create config else `Unikernel_create config @@ -241,6 +243,10 @@ let restart_on_fail = let doc = "Restart on fail" in Arg.(value & flag & info [ "restart-on-fail" ] ~doc) +let exit_code = + let doc = "Exit code to restart on" in + Arg.(value & opt_all int [] & info [ "exit-code" ] ~doc) + let timestamp_c = let parse s = match Ptime.of_rfc3339 s with | Ok (t, _, _) -> `Ok t diff --git a/daemon/albatrossd.ml b/daemon/albatrossd.ml index 00d2f6a..46c8170 100644 --- a/daemon/albatrossd.ml +++ b/daemon/albatrossd.ml @@ -37,39 +37,19 @@ let rec create stat_out log_out cons_out data_out hdr name config = Lwt.return (None, fail_cont ()) | Ok (state', stat, log, data, name, vm) -> state := state'; - (match Unikernel.(vm.config.fail_behaviour) with - | `Quit -> () - | `Restart -> + (if Unikernel.restart_handler config then match Vmm_vmmd.register_restart !state name Lwt.task with | None -> () | Some (state', task) -> state := state'; Lwt.async (fun () -> - task >>= function - | (`Signal _ | `Stop _) as r -> - Logs.warn (fun m -> m "unikernel %a exited with signal %a" - Name.pp name pp_process_exit r); - Lwt.return_unit - | `Exit i -> - (* results: - normal exit (i.e. teardown) is 0 - solo5-exit allows an arbitrary int - solo5-abort emits 255 - solo5 internal error (bad image, bad manigest) is 1 - ocaml exceptions (out of memory et al) use 2 - -> soon (4.10) they'll abort == 255 - signal 11 is if a kill -TERM was sent (i.e. our destroy) - - --> best: user-provided list of which exit codes to restart on - (and filter 1 specially) - *) - match i with - | 1 -> Logs.warn (fun m -> m "solo5 exit failure"); Lwt.return_unit - | _ -> - Logs.info (fun m -> m "solo5 exited with %d, restarting" i); - Lwt_mutex.with_lock create_lock (fun () -> - create stat_out log_out cons_out stub_data_out - stub_hdr name vm.Unikernel.config))); + task >>= fun r -> + if should_restart config name r then + Lwt_mutex.with_lock create_lock (fun () -> + create stat_out log_out cons_out stub_data_out + stub_hdr name vm.Unikernel.config) + else + Lwt.return_unit)); stat_out "setting up stat" stat >>= fun () -> log_out "setting up log" log >|= fun () -> (Some vm, data)) >>= fun (started, data) -> diff --git a/provision/albatross_provision_request.ml b/provision/albatross_provision_request.ml index 2b2f33b..157d24a 100644 --- a/provision/albatross_provision_request.ml +++ b/provision/albatross_provision_request.ml @@ -40,8 +40,8 @@ let info_ _ name = jump name (`Unikernel_cmd `Unikernel_info) let destroy _ name = jump name (`Unikernel_cmd `Unikernel_destroy) -let create _ force name image cpuid memory argv block network compression restart_on_fail = - match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail with +let create _ force name image cpuid memory argv block network compression restart_on_fail exit_code = + match Albatross_cli.create_vm force image cpuid memory argv block network compression restart_on_fail exit_code with | Ok cmd -> jump name (`Unikernel_cmd cmd) | Error (`Msg msg) -> Error (`Msg msg) @@ -122,7 +122,7 @@ let create_cmd = [`S "DESCRIPTION"; `P "Creates a virtual machine."] in - Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail)), + Term.(term_result (const create $ setup_log $ force $ vm_name $ image $ cpu $ vm_mem $ args $ block $ net $ compress_level 9 $ restart_on_fail $ exit_code)), Term.info "create" ~doc ~man let console_cmd = diff --git a/src/vmm_asn.ml b/src/vmm_asn.ml index 61f143f..ace72db 100644 --- a/src/vmm_asn.ml +++ b/src/vmm_asn.ml @@ -279,15 +279,25 @@ let typ = let fail_behaviour = let f = function | `C1 () -> `Quit - | `C2 () -> `Restart + | `C2 xs -> + let exit_codes = match xs with + | [] -> None + | xs -> Some (IS.of_list xs) + in + `Restart exit_codes and g = function | `Quit -> `C1 () - | `Restart -> `C2 () + | `Restart xs -> + let exit_codes = match xs with + | None -> [] + | Some i -> IS.elements i + in + `C2 exit_codes in Asn.S.map f g @@ Asn.S.(choice2 (explicit 0 null) - (explicit 1 null)) + (explicit 1 (set_of int))) let unikernel_config = let open Unikernel in @@ -309,8 +319,8 @@ let unikernel_config = @ (required ~label:"fail behaviour" fail_behaviour) @ (required ~label:"cpuid" int) @ (required ~label:"memory" int) - @ (optional ~label:"blocks" (explicit 0 (sequence_of utf8_string))) - @ (optional ~label:"bridges" (explicit 1 (sequence_of utf8_string))) + @ (optional ~label:"blocks" (explicit 0 (set_of utf8_string))) + @ (optional ~label:"bridges" (explicit 1 (set_of utf8_string))) -@ (optional ~label:"arguments"(explicit 2 (sequence_of utf8_string)))) let unikernel_cmd = diff --git a/src/vmm_core.ml b/src/vmm_core.ml index 85b1019..80e957b 100644 --- a/src/vmm_core.ml +++ b/src/vmm_core.ml @@ -156,10 +156,14 @@ module Unikernel = struct let pp_typ ppf = function | `Solo5 -> Fmt.pf ppf "solo5" - type fail_behaviour = [ `Quit | `Restart ] + type fail_behaviour = [ `Quit | `Restart of IS.t option ] - let pp_fail_behaviour ppf f = - Fmt.string ppf (match f with `Quit -> "quit" | `Restart -> "restart") + let pp_fail_behaviour ppf = function + | `Quit -> Fmt.string ppf "quit" + | `Restart codes -> + Fmt.pf ppf "restart %a" + Fmt.(option ~none:(unit "all except 1") (list ~sep:(unit ", ") int)) + (match codes with None -> None | Some x -> Some (IS.elements x)) type config = { typ : typ ; @@ -184,6 +188,9 @@ module Unikernel = struct Fmt.(list ~sep:(unit ", ") string) vm.bridges Fmt.(option ~none:(unit "no") (list ~sep:(unit " ") string)) vm.argv + let restart_handler config = + match config.fail_behaviour with `Quit -> false | `Restart _ -> true + type t = { config : config ; cmd : Bos.Cmd.t ; @@ -289,6 +296,43 @@ let pp_process_exit ppf = function | `Signal n -> Fmt.pf ppf "signal %a (numeric %d)" Fmt.Dump.signal n n | `Stop n -> Fmt.pf ppf "stop %a (numeric %d)" Fmt.Dump.signal n n +let should_restart config name = function + | (`Signal _ | `Stop _) as r -> + (* signal 11 is if a kill -TERM was sent (i.e. our destroy) *) + Logs.warn (fun m -> m "unikernel %a exited with signal %a" + Name.pp name pp_process_exit r); + false + | `Exit i -> + (* results (and default behaviour) -- solo5-exit allows an arbitrary int + 0 normal exit (i.e. teardown) -> restart + 1 solo5 internal error (bad image, bad manigest) -> no restart, never + 2 ocaml exceptions (out of memory et al) -> restart + 64..70 -> no restart (soon to be used by unikernel command line parsing) + 255 solo5-abort -> soon (OCaml 4.10) fatal error (out of memory) -> restart *) + let opt_mem i = + match config.Unikernel.fail_behaviour with + | `Quit -> assert false + | `Restart None -> true + | `Restart (Some c) -> IS.mem i c + in + match i with + | 1 -> + Logs.warn (fun m -> m "unikernel %a solo5 exit failure (1)" + Name.pp name); + false + | 64 | 65 | 66 | 67 | 68 | 69 | 70 -> + Logs.warn (fun m -> m "unikernel %a exited %d, not restarting" + Name.pp name i); + false + | _ when opt_mem i -> + Logs.info (fun m -> m "unikernel %a exited %d, restarting" + Name.pp name i); + true + | _ -> + Logs.info (fun m -> m "unikernel %a exited %d, not restarting %a" + Name.pp name i Unikernel.pp_fail_behaviour config.fail_behaviour); + false + module Log = struct type log_event = [ | `Login of Name.t * Ipaddr.V4.t * int diff --git a/src/vmm_core.mli b/src/vmm_core.mli index cc5dfd6..55fbbb5 100644 --- a/src/vmm_core.mli +++ b/src/vmm_core.mli @@ -58,7 +58,7 @@ module Unikernel : sig type typ = [ `Solo5 ] val pp_typ : typ Fmt.t - type fail_behaviour = [ `Quit | `Restart ] + type fail_behaviour = [ `Quit | `Restart of IS.t option ] type config = { typ : typ ; @@ -74,6 +74,8 @@ module Unikernel : sig val pp_config : config Fmt.t + val restart_handler : config -> bool + type t = { config : config; cmd : Bos.Cmd.t; @@ -153,6 +155,8 @@ type process_exit = [ `Exit of int | `Signal of int | `Stop of int ] val pp_process_exit : process_exit Fmt.t +val should_restart : Unikernel.config -> Name.t -> process_exit -> bool + module Log : sig type log_event = [ | `Login of Name.t * Ipaddr.V4.t * int