2017-05-26 14:30:34 +00:00
|
|
|
(* (c) 2017 Hannes Mehnert, all rights reserved *)
|
|
|
|
|
2019-03-27 23:11:43 +00:00
|
|
|
open Albatross_cli
|
2018-10-25 23:11:41 +00:00
|
|
|
|
2018-11-13 00:02:05 +00:00
|
|
|
open Vmm_core
|
|
|
|
|
2017-05-26 14:30:34 +00:00
|
|
|
open Lwt.Infix
|
|
|
|
|
2018-11-12 21:11:06 +00:00
|
|
|
let version = `AV3
|
2018-09-09 18:52:04 +00:00
|
|
|
|
2018-10-23 22:10:08 +00:00
|
|
|
let state = ref (Vmm_vmmd.init version)
|
2018-10-12 23:05:21 +00:00
|
|
|
|
2019-10-10 23:10:33 +00:00
|
|
|
let stub_hdr = Vmm_commands.{ version ; sequence = 0L ; name = Name.root }
|
|
|
|
let stub_data_out _ = Lwt.return_unit
|
|
|
|
|
|
|
|
(*
|
|
|
|
- handle_create only prepares the unikernel (fifo, image file)
|
|
|
|
-> IO console about fifo
|
|
|
|
- only the succ_cont later commits this (to resources)
|
|
|
|
--> there's a brief period
|
|
|
|
*)
|
|
|
|
|
|
|
|
let create_lock = Lwt_mutex.create ()
|
|
|
|
|
|
|
|
let rec create stat_out log_out cons_out data_out hdr name config =
|
|
|
|
(match Vmm_vmmd.handle_create !state hdr name config with
|
|
|
|
| Error `Msg msg ->
|
|
|
|
Logs.err (fun m -> m "failed to create %a: %s" Name.pp name msg) ;
|
|
|
|
Lwt.return (None, (hdr, `Failure msg))
|
2019-10-11 21:04:59 +00:00
|
|
|
| Ok (state', (cons, succ_cont, fail_cont)) ->
|
2019-10-10 23:10:33 +00:00
|
|
|
state := state';
|
|
|
|
cons_out "create" cons >>= function
|
|
|
|
| Error () -> Lwt.return (None, fail_cont ())
|
|
|
|
| Ok () -> match succ_cont !state with
|
|
|
|
| Error (`Msg msg) ->
|
|
|
|
Logs.err (fun m -> m "create (exec) failed %s" msg) ;
|
|
|
|
Lwt.return (None, fail_cont ())
|
|
|
|
| Ok (state', stat, log, data, name, vm) ->
|
|
|
|
state := state';
|
|
|
|
(match Unikernel.(vm.config.fail_behaviour) with
|
|
|
|
| `Quit -> ()
|
|
|
|
| `Restart ->
|
|
|
|
match Vmm_vmmd.register_restart !state name Lwt.task with
|
|
|
|
| None -> ()
|
|
|
|
| Some (state', task) ->
|
|
|
|
state := state';
|
|
|
|
Lwt.async (fun () ->
|
|
|
|
task >>= function
|
|
|
|
| (`Signal _ | `Stop _) as r ->
|
|
|
|
Logs.warn (fun m -> m "unikernel %a exited with signal %a"
|
|
|
|
Name.pp name pp_process_exit r);
|
|
|
|
Lwt.return_unit
|
|
|
|
| `Exit i ->
|
|
|
|
(* results:
|
|
|
|
normal exit (i.e. teardown) is 0
|
|
|
|
solo5-exit allows an arbitrary int
|
|
|
|
solo5-abort emits 255
|
|
|
|
solo5 internal error (bad image, bad manigest) is 1
|
|
|
|
ocaml exceptions (out of memory et al) use 2
|
|
|
|
-> soon (4.10) they'll abort == 255
|
|
|
|
signal 11 is if a kill -TERM was sent (i.e. our destroy)
|
|
|
|
|
|
|
|
--> best: user-provided list of which exit codes to restart on
|
|
|
|
(and filter 1 specially)
|
|
|
|
*)
|
|
|
|
match i with
|
|
|
|
| 1 -> Logs.warn (fun m -> m "solo5 exit failure"); Lwt.return_unit
|
|
|
|
| _ ->
|
|
|
|
Logs.info (fun m -> m "solo5 exited with %d, restarting" i);
|
|
|
|
Lwt_mutex.with_lock create_lock (fun () ->
|
|
|
|
create stat_out log_out cons_out stub_data_out
|
|
|
|
stub_hdr name vm.Unikernel.config)));
|
|
|
|
stat_out "setting up stat" stat >>= fun () ->
|
|
|
|
log_out "setting up log" log >|= fun () ->
|
|
|
|
(Some vm, data)) >>= fun (started, data) ->
|
|
|
|
(match started with
|
|
|
|
| None -> ()
|
|
|
|
| Some vm ->
|
|
|
|
Lwt.async (fun () ->
|
|
|
|
Vmm_lwt.wait_and_clear vm.Unikernel.pid >>= fun r ->
|
|
|
|
Lwt_mutex.with_lock create_lock (fun () ->
|
|
|
|
let state', stat', log' = Vmm_vmmd.handle_shutdown !state name vm r in
|
|
|
|
state := state';
|
|
|
|
stat_out "handle shutdown stat" stat' >>= fun () ->
|
|
|
|
log_out "handle shutdown log" log' >|= fun () ->
|
|
|
|
let state', waiter_opt = Vmm_vmmd.waiter !state name in
|
|
|
|
state := state';
|
|
|
|
waiter_opt) >|= function
|
|
|
|
| None -> ()
|
|
|
|
| Some wakeme -> Lwt.wakeup wakeme r));
|
|
|
|
data_out data
|
2019-01-20 20:41:49 +00:00
|
|
|
|
2019-01-27 15:07:53 +00:00
|
|
|
let handle log_out cons_out stat_out fd addr =
|
2018-09-09 18:52:04 +00:00
|
|
|
Logs.debug (fun m -> m "connection from %a" Vmm_lwt.pp_sockaddr addr) ;
|
|
|
|
(* now we need to read a packet and handle it
|
|
|
|
(1)
|
|
|
|
(a) easy for info (look up name/prefix in resources)
|
|
|
|
(b) destroy looks up vm in resources, executes kill (wait for pid will do the cleanup)
|
|
|
|
logs "destroy issued"
|
|
|
|
(c) create initiates the vm startup procedure:
|
|
|
|
write image file, create fifo, create tap(s), send fifo to console
|
|
|
|
-- Lwt effects happen (console) --
|
2018-09-21 20:31:04 +00:00
|
|
|
executes solo5-hvt + waiter, send stats pid and taps, inserts await into state, logs "created vm"
|
2018-09-09 18:52:04 +00:00
|
|
|
-- Lwt effects happen (stats, logs, wait_and_clear) --
|
|
|
|
(2) goto (1)
|
|
|
|
*)
|
2019-01-27 15:07:53 +00:00
|
|
|
let out wire =
|
|
|
|
(* TODO should we terminate the connection on write failure? *)
|
|
|
|
Vmm_lwt.write_wire fd wire >|= fun _ -> ()
|
|
|
|
in
|
|
|
|
|
2018-10-31 21:40:09 +00:00
|
|
|
let rec loop () =
|
|
|
|
Logs.debug (fun m -> m "now reading") ;
|
|
|
|
Vmm_lwt.read_wire fd >>= function
|
2018-09-09 18:52:04 +00:00
|
|
|
| Error _ ->
|
|
|
|
Logs.err (fun m -> m "error while reading") ;
|
|
|
|
Lwt.return_unit
|
2018-10-22 21:20:00 +00:00
|
|
|
| Ok wire ->
|
2019-01-20 20:56:32 +00:00
|
|
|
Logs.debug (fun m -> m "read %a" Vmm_commands.pp_wire wire) ;
|
2019-10-10 23:10:33 +00:00
|
|
|
Lwt_mutex.lock create_lock >>= fun () ->
|
2019-01-27 15:07:53 +00:00
|
|
|
match Vmm_vmmd.handle_command !state wire with
|
2019-10-10 23:10:33 +00:00
|
|
|
| Error wire -> Lwt_mutex.unlock create_lock; out wire
|
2019-01-27 15:07:53 +00:00
|
|
|
| Ok (state', next) ->
|
|
|
|
state := state' ;
|
|
|
|
match next with
|
2019-10-10 23:10:33 +00:00
|
|
|
| `Loop wire -> Lwt_mutex.unlock create_lock; out wire >>= loop
|
|
|
|
| `End wire -> Lwt_mutex.unlock create_lock; out wire
|
|
|
|
| `Create (hdr, id, vm) ->
|
|
|
|
create stat_out log_out cons_out out hdr id vm >|= fun () ->
|
|
|
|
Lwt_mutex.unlock create_lock
|
2019-01-27 15:07:53 +00:00
|
|
|
| `Wait (who, data) ->
|
2019-10-10 23:10:33 +00:00
|
|
|
let state', task = Vmm_vmmd.register !state who Lwt.task in
|
|
|
|
state := state';
|
|
|
|
Lwt_mutex.unlock create_lock;
|
|
|
|
task >>= fun r ->
|
|
|
|
out (data r)
|
|
|
|
| `Wait_and_create (who, (hdr, id, vm)) ->
|
|
|
|
let state', task = Vmm_vmmd.register !state who Lwt.task in
|
|
|
|
state := state';
|
|
|
|
Lwt_mutex.unlock create_lock;
|
|
|
|
task >>= fun r ->
|
|
|
|
Logs.info (fun m -> m "wait returned %a" pp_process_exit r);
|
|
|
|
Lwt_mutex.with_lock create_lock (fun () ->
|
|
|
|
create stat_out log_out cons_out out hdr id vm)
|
2018-10-31 21:40:09 +00:00
|
|
|
in
|
|
|
|
loop () >>= fun () ->
|
2018-09-09 18:52:04 +00:00
|
|
|
Vmm_lwt.safe_close fd
|
2017-05-26 14:30:34 +00:00
|
|
|
|
2019-10-10 23:10:33 +00:00
|
|
|
let write_reply name fd txt (header, cmd) =
|
|
|
|
Vmm_lwt.write_wire fd (header, cmd) >>= function
|
|
|
|
| Error `Exception -> invalid_arg ("exception during " ^ txt ^ " while writing to " ^ name)
|
|
|
|
| Ok () ->
|
|
|
|
Vmm_lwt.read_wire fd >|= function
|
|
|
|
| Ok (header', reply) ->
|
|
|
|
if not Vmm_commands.(version_eq header.version header'.version) then begin
|
|
|
|
Logs.err (fun m -> m "%s: wrong version (got %a, expected %a) in reply from %s"
|
|
|
|
txt
|
|
|
|
Vmm_commands.pp_version header'.Vmm_commands.version
|
|
|
|
Vmm_commands.pp_version header.Vmm_commands.version
|
|
|
|
name) ;
|
|
|
|
invalid_arg "bad version received"
|
|
|
|
end else if not Vmm_commands.(Int64.equal header.sequence header'.sequence) then begin
|
|
|
|
Logs.err (fun m -> m "%s: wrong id %Lu (expected %Lu) in reply from %s"
|
|
|
|
txt header'.Vmm_commands.sequence header.Vmm_commands.sequence name) ;
|
|
|
|
invalid_arg "wrong sequence number received"
|
|
|
|
end else begin
|
|
|
|
Logs.debug (fun m -> m "%s: received valid reply from %s %a (request %a)"
|
|
|
|
txt name Vmm_commands.pp_wire (header', reply) Vmm_commands.pp_wire (header,cmd)) ;
|
|
|
|
match reply with
|
|
|
|
| `Success _ -> Ok ()
|
|
|
|
| `Failure msg ->
|
|
|
|
Logs.err (fun m -> m "%s: received failure %s from %s" txt msg name) ;
|
|
|
|
Error ()
|
|
|
|
| _ ->
|
|
|
|
Logs.err (fun m -> m "%s: unexpected data from %s" txt name) ;
|
|
|
|
invalid_arg "unexpected data"
|
|
|
|
end
|
|
|
|
| Error _ ->
|
|
|
|
Logs.err (fun m -> m "error in read from %s" name) ;
|
|
|
|
invalid_arg "communication failure"
|
2019-01-27 15:07:53 +00:00
|
|
|
|
2019-10-10 20:26:36 +00:00
|
|
|
let m = conn_metrics "unix"
|
|
|
|
|
|
|
|
let jump _ influx =
|
2019-01-18 00:14:11 +00:00
|
|
|
Sys.(set_signal sigpipe Signal_ignore);
|
2019-01-20 21:17:59 +00:00
|
|
|
match Vmm_vmmd.restore_unikernels () with
|
|
|
|
| Error (`Msg msg) -> Logs.err (fun m -> m "bailing out: %s" msg)
|
|
|
|
| Ok old_unikernels ->
|
|
|
|
Lwt_main.run
|
2019-10-10 23:10:33 +00:00
|
|
|
(let unix_connect s =
|
|
|
|
Vmm_lwt.connect Lwt_unix.PF_UNIX (Lwt_unix.ADDR_UNIX (socket_path s))
|
|
|
|
in
|
|
|
|
init_influx "albatross" influx;
|
2019-10-10 20:26:36 +00:00
|
|
|
Vmm_lwt.server_socket `Vmmd >>= fun ss ->
|
2019-10-10 23:10:33 +00:00
|
|
|
(unix_connect `Log >|= function
|
2019-01-20 21:17:59 +00:00
|
|
|
| None -> invalid_arg "cannot connect to log socket"
|
2019-01-27 15:07:53 +00:00
|
|
|
| Some l -> l) >>= fun l ->
|
2019-01-20 21:17:59 +00:00
|
|
|
let self_destruct_mutex = Lwt_mutex.create () in
|
|
|
|
let self_destruct () =
|
|
|
|
Lwt_mutex.with_lock self_destruct_mutex (fun () ->
|
|
|
|
(if Vmm_vmmd.killall !state then
|
|
|
|
(* not too happy about the sleep here, but cleaning up resources
|
|
|
|
is really important (fifos, vm images, tap devices) - which
|
|
|
|
is done asynchronously (in the task waitpid() on the pid) *)
|
|
|
|
Lwt_unix.sleep 1.
|
|
|
|
else
|
|
|
|
Lwt.return_unit) >>= fun () ->
|
|
|
|
Vmm_lwt.safe_close ss)
|
|
|
|
in
|
|
|
|
Sys.(set_signal sigterm (Signal_handle (fun _ -> Lwt.async self_destruct)));
|
2019-10-10 23:10:33 +00:00
|
|
|
(unix_connect `Console >|= function
|
2019-01-20 21:17:59 +00:00
|
|
|
| None -> invalid_arg "cannot connect to console socket"
|
2019-01-27 15:07:53 +00:00
|
|
|
| Some c -> c) >>= fun c ->
|
2019-10-10 23:10:33 +00:00
|
|
|
unix_connect `Stats >>= fun s ->
|
2019-01-20 21:17:59 +00:00
|
|
|
|
2019-01-27 15:07:53 +00:00
|
|
|
let log_out txt wire = write_reply "log" l txt wire >|= fun _ -> ()
|
|
|
|
and cons_out = write_reply "cons" c
|
|
|
|
and stat_out txt wire = match s with
|
|
|
|
| None -> Logs.info (fun m -> m "ignoring stat %s %a" txt Vmm_commands.pp_wire wire) ; Lwt.return_unit
|
|
|
|
| Some s -> write_reply "stat" s txt wire >|= fun _ -> ()
|
2019-01-20 21:17:59 +00:00
|
|
|
in
|
|
|
|
|
2019-10-10 23:10:33 +00:00
|
|
|
Lwt_list.iter_p (fun (name, config) ->
|
|
|
|
create stat_out log_out cons_out stub_data_out stub_hdr name config)
|
|
|
|
(Vmm_trie.all old_unikernels) >>= fun () ->
|
2019-01-20 21:17:59 +00:00
|
|
|
|
|
|
|
Lwt.catch (fun () ->
|
|
|
|
let rec loop () =
|
|
|
|
Lwt_unix.accept ss >>= fun (fd, addr) ->
|
|
|
|
Lwt_unix.set_close_on_exec fd ;
|
2019-10-10 20:26:36 +00:00
|
|
|
m `Open;
|
|
|
|
Lwt.async (fun () ->
|
|
|
|
handle log_out cons_out stat_out fd addr >|= fun () ->
|
|
|
|
m `Close) ;
|
2019-01-20 21:17:59 +00:00
|
|
|
loop ()
|
|
|
|
in
|
|
|
|
loop ())
|
|
|
|
(fun e ->
|
|
|
|
Logs.err (fun m -> m "exception %s, shutting down" (Printexc.to_string e));
|
|
|
|
self_destruct ()))
|
2017-05-26 14:30:34 +00:00
|
|
|
|
|
|
|
open Cmdliner
|
|
|
|
|
|
|
|
let cmd =
|
2019-10-10 20:26:36 +00:00
|
|
|
Term.(const jump $ setup_log $ influx),
|
2019-03-27 23:11:43 +00:00
|
|
|
Term.info "albatrossd" ~version:"%%VERSION_NUM%%"
|
2017-05-26 14:30:34 +00:00
|
|
|
|
|
|
|
let () = match Term.eval cmd with `Ok () -> exit 0 | _ -> exit 1
|