revise force-restart: now with wait for kill and resource cleanup before start

allows to cleanup various hacks, such as checking for pid in vmm_resources
or removing temporarily the allocated resources from the resource map in vmm_engine

semantics is now slightly different, but for sure enhanced.
- each VM has a Lwt.wait () task attached in Vmm_engine.t (tasks : 'c String.Map.t)
- normal create shouldn't be much different, apart from memoizing the sleeper
- after waitpid is done in vmmd, and vmm_engine.shutdown succeeded, Lwt.wakeup is called for the sleeper
- force create now:
 - checks static policies
 - looks for existing VM (and task), if present: kill and wait for task in vmmd
 - continue with presence checking of vm name, dynamic policies, allocate resources (tap, img, fifo)

this means the whole randomness in filenames can be removed, and the
communication between vmm_console and vmm_client is working again (attach/detach
could not work since vmm_console knew only about "albatross.AAA.BBB.RANDOM",
whereas vmm_client insisted on "AAA.BBB"

resource overcommitment (and races in e.g. block device closing + opening) are
gone now, only if the old vm is cleanup up, resources for the new one are
allocated and it is executed
This commit is contained in:
Hannes Mehnert 2018-04-05 01:02:45 +02:00
parent a89b2925fd
commit 9696953cd7
6 changed files with 150 additions and 129 deletions

View file

@ -67,56 +67,71 @@ let handle ca state t =
| Ok (state', outs, next) ->
state := state' ;
process state outs >>= fun () ->
(match next with
| `Create cont ->
(match cont !state t with
| Ok (state', outs, vm) ->
state := state' ;
s := { !s with vm_created = succ !s.vm_created } ;
Lwt.async (fun () ->
Vmm_lwt.wait_and_clear vm.Vmm_core.pid vm.Vmm_core.stdout >>= fun r ->
let state', outs = Vmm_engine.handle_shutdown !state vm r in
s := { !s with vm_destroyed = succ !s.vm_destroyed } ;
state := state' ;
process state outs) ;
process state outs >>= fun () ->
begin
match Vmm_engine.setup_stats !state vm with
| Ok (state', outs) ->
state := state' ;
process state outs
| Error (`Msg e) ->
Logs.warn (fun m -> m "(ignored) error %s while setting up statistics" e) ;
Lwt.return_unit
end
| Error (`Msg e) ->
Logs.err (fun m -> m "error while cont %s" e) ;
let err = Vmm_wire.fail ~msg:e 0 !state.Vmm_engine.client_version in
process state [ `Tls (t, err) ]) >>= fun () ->
Tls_lwt.Unix.close (fst t)
| `Loop (prefix, perms) ->
let rec loop () =
Vmm_tls.read_tls (fst t) >>= function
| Error (`Msg msg) ->
Logs.err (fun m -> m "reading client %a error: %s" pp_sockaddr t msg) ;
loop ()
| Error _ ->
Logs.err (fun m -> m "disconnect from %a" pp_sockaddr t) ;
let state', cons = Vmm_engine.handle_disconnect !state t in
state := state' ;
Lwt_list.iter_s (fun (s, data) -> write_raw s data) cons >>= fun () ->
Tls_lwt.Unix.close (fst t)
| Ok (hdr, buf) ->
let state', out = Vmm_engine.handle_command !state t prefix perms hdr buf in
state := state' ;
process state out >>= fun () ->
loop ()
in
loop ()
| `Close socks ->
Logs.debug (fun m -> m "closing session with %d active ones" (List.length socks)) ;
Lwt_list.iter_s (fun (t, _) -> Tls_lwt.Unix.close t) socks >>= fun () ->
Tls_lwt.Unix.close (fst t))
begin match next with
| `Create (task, next) ->
(match task with
| None -> Lwt.return_unit
| Some (kill, wait) -> kill () ; wait) >>= fun () ->
let await, wakeme = Lwt.wait () in
begin match next !state await with
| Ok (state', outs, cont) ->
state := state' ;
process state outs >>= fun () ->
begin match cont !state t with
| Ok (state', outs, vm) ->
state := state' ;
s := { !s with vm_created = succ !s.vm_created } ;
Lwt.async (fun () ->
Vmm_lwt.wait_and_clear vm.Vmm_core.pid vm.Vmm_core.stdout >>= fun r ->
let state', outs = Vmm_engine.handle_shutdown !state vm r in
s := { !s with vm_destroyed = succ !s.vm_destroyed } ;
state := state' ;
process state outs >|= fun () ->
Lwt.wakeup wakeme ()) ;
process state outs >>= fun () ->
begin match Vmm_engine.setup_stats !state vm with
| Ok (state', outs) ->
state := state' ;
process state outs
| Error (`Msg e) ->
Logs.warn (fun m -> m "(ignored) error %s while setting up statistics" e) ;
Lwt.return_unit
end
| Error (`Msg e) ->
Logs.err (fun m -> m "error while create %s" e) ;
let err = Vmm_wire.fail ~msg:e 0 !state.Vmm_engine.client_version in
process state [ `Tls (t, err) ]
end
| Error (`Msg e) ->
Logs.err (fun m -> m "error while cont %s" e) ;
let err = Vmm_wire.fail ~msg:e 0 !state.Vmm_engine.client_version in
process state [ `Tls (t, err) ]
end >>= fun () ->
Tls_lwt.Unix.close (fst t)
| `Loop (prefix, perms) ->
let rec loop () =
Vmm_tls.read_tls (fst t) >>= function
| Error (`Msg msg) ->
Logs.err (fun m -> m "reading client %a error: %s" pp_sockaddr t msg) ;
loop ()
| Error _ ->
Logs.err (fun m -> m "disconnect from %a" pp_sockaddr t) ;
let state', cons = Vmm_engine.handle_disconnect !state t in
state := state' ;
Lwt_list.iter_s (fun (s, data) -> write_raw s data) cons >>= fun () ->
Tls_lwt.Unix.close (fst t)
| Ok (hdr, buf) ->
let state', out = Vmm_engine.handle_command !state t prefix perms hdr buf in
state := state' ;
process state out >>= fun () ->
loop ()
in
loop ()
| `Close socks ->
Logs.debug (fun m -> m "closing session with %d active ones" (List.length socks)) ;
Lwt_list.iter_s (fun (t, _) -> Tls_lwt.Unix.close t) socks >>= fun () ->
Tls_lwt.Unix.close (fst t)
end
| Error (`Msg e) ->
Logs.err (fun m -> m "VMM %a %s" pp_sockaddr t e) ;
let err = Vmm_wire.fail ~msg:e 0 !state.Vmm_engine.client_version in

View file

@ -57,16 +57,10 @@ let rec mkfifo name =
try Unix.mkfifo (Fpath.to_string name) 0o640 with
| Unix.Unix_error (Unix.EINTR, _, _) -> mkfifo name
let image_fn = Fpath.add_ext "img"
let fifo_fn = Fpath.add_ext "fifo"
let tmpfile vm =
let random =
let cs = Nocrypto.Rng.generate 8 in
match Hex.of_cstruct cs with `Hex str -> str
in
let baseid = filename vm in
Fpath.(v (Filename.get_temp_dir_name ()) / "albatross" + baseid + random)
let image_file, fifo_file =
let tmp = Fpath.v (Filename.get_temp_dir_name ()) in
((fun vm -> Fpath.(tmp / (vm_id vm) + "img")),
(fun vm -> Fpath.(tmp / (vm_id vm) + "fifo")))
let rec fifo_exists file =
try Ok (Unix.((stat @@ Fpath.to_string file).st_kind = S_FIFO)) with
@ -122,7 +116,6 @@ let create_bridge bname =
| x -> Error (`Msg ("unsupported operating system " ^ x))
let prepare vm =
let tmpfile = tmpfile vm in
(match vm.vmimage with
| `Ukvm_amd64, blob -> Ok blob
| `Ukvm_amd64_compressed, blob ->
@ -131,8 +124,8 @@ let prepare vm =
| Error () -> Error (`Msg "failed to uncompress")
end
| `Ukvm_arm64, _ -> Error (`Msg "no amd64 ukvm image found")) >>= fun image ->
Bos.OS.File.write (image_fn tmpfile) (Cstruct.to_string image) >>= fun () ->
let fifo = fifo_fn tmpfile in
Bos.OS.File.write (image_file vm) (Cstruct.to_string image) >>= fun () ->
let fifo = fifo_file vm in
(match fifo_exists fifo with
| Ok true -> Ok ()
| Ok false -> Error (`Msg ("file " ^ Fpath.to_string fifo ^ " exists and is not a fifo"))
@ -146,12 +139,12 @@ let prepare vm =
create_tap b >>= fun tap ->
Ok (tap :: acc))
(Ok []) vm.network >>= fun taps ->
Ok (tmpfile, List.rev taps)
Ok (List.rev taps)
let shutdown vm =
(* same order as prepare! *)
Bos.OS.File.delete (image_fn vm.tmpfile) >>= fun () ->
Bos.OS.File.delete (fifo_fn vm.tmpfile) >>= fun () ->
Bos.OS.File.delete (image_file vm.config) >>= fun () ->
Bos.OS.File.delete (fifo_file vm.config) >>= fun () ->
List.fold_left (fun r n -> r >>= fun () -> destroy_tap n) (Ok ()) vm.taps
let cpuset cpu =
@ -164,7 +157,7 @@ let cpuset cpu =
Ok ([ "taskset" ; "-c" ; cpustring ])
| x -> Error (`Msg ("unsupported operating system " ^ x))
let exec dir vm tmpfile taps =
let exec dir vm taps =
(* TODO: --net-mac=xx *)
let net = List.map (fun t -> "--net=" ^ t) taps in
let argv = match vm.argv with None -> [] | Some xs -> xs in
@ -174,11 +167,14 @@ let exec dir vm tmpfile taps =
| _ -> Error (`Msg "cannot handle multiple network interfaces")) >>= fun bin ->
cpuset vm.cpuid >>= fun cpuset ->
let mem = "--mem=" ^ string_of_int vm.requested_memory in
let cmd = Bos.Cmd.(of_list cpuset % p bin % mem %% of_list net % "--" % p (image_fn tmpfile) %% of_list argv) in
let cmd =
Bos.Cmd.(of_list cpuset % p bin % mem %% of_list net %
"--" % p (image_file vm) %% of_list argv)
in
let line = Bos.Cmd.to_list cmd in
let prog = try List.hd line with Failure _ -> failwith err_empty_line in
let line = Array.of_list line in
let fifo = fifo_fn tmpfile in
let fifo = fifo_file vm in
Logs.debug (fun m -> m "write fd for fifo %a" Fpath.pp fifo);
write_fd_for_file fifo >>= fun stdout ->
Logs.debug (fun m -> m "opened file descriptor!");
@ -188,7 +184,7 @@ let exec dir vm tmpfile taps =
Logs.debug (fun m -> m "created process %d: %a" pid Bos.Cmd.pp cmd) ;
(* this should get rid of the vmimage from vmmd's memory! *)
let config = { vm with vmimage = (fst vm.vmimage, Cstruct.create 0) } in
Ok { config ; cmd ; pid ; taps ; stdout ; tmpfile }
Ok { config ; cmd ; pid ; taps ; stdout }
with
Unix.Unix_error (e, _, _) ->
close_no_err stdout;

View file

@ -4,11 +4,11 @@ open Rresult
open Vmm_core
val prepare : vm_config -> (Fpath.t * string list, [> R.msg ]) result
val prepare : vm_config -> (string list, [> R.msg ]) result
val shutdown : vm -> (unit, [> R.msg ]) result
val exec : Fpath.t -> vm_config -> Fpath.t -> string list -> (vm, [> R.msg ]) result
val exec : Fpath.t -> vm_config -> string list -> (vm, [> R.msg ]) result
val destroy : vm -> unit

View file

@ -178,7 +178,7 @@ type vm_config = {
let fullname vm = vm.prefix @ [ vm.vname ]
let filename vm = string_of_id (fullname vm)
let vm_id vm = string_of_id (fullname vm)
(* used for block devices *)
let location vm = match vm.prefix with
@ -222,14 +222,13 @@ type vm = {
cmd : Bos.Cmd.t ;
pid : int ;
taps : string list ;
stdout : Unix.file_descr ; (* ringbuffer thingy *)
tmpfile : Fpath.t
stdout : Unix.file_descr (* ringbuffer thingy *)
}
let pp_vm ppf vm =
Fmt.pf ppf "pid %d@ taps %a cmdline %a tmpfile %a"
Fmt.pf ppf "pid %d@ taps %a cmdline %a"
vm.pid Fmt.(list ~sep:(unit ", ") string) vm.taps
Bos.Cmd.pp vm.cmd Fpath.pp vm.tmpfile
Bos.Cmd.pp vm.cmd
let translate_tap vm tap =
match List.filter (fun (t, _) -> tap = t) (List.combine vm.taps vm.config.network) with

View file

@ -7,12 +7,12 @@ open Vmm_core
open Rresult
open R.Infix
type ('a, 'b) t = {
type ('a, 'b, 'c) t = {
dir : Fpath.t ;
cmp : 'b -> 'b -> bool ;
console_socket : 'a ;
console_counter : int ;
console_requests : (('a, 'b) t -> ('a, 'b) t * [ `Raw of 'a * string | `Tls of 'b * string ] list) IM.t ;
console_requests : (('a, 'b, 'c) t -> ('a, 'b, 'c) t * [ `Raw of 'a * string | `Tls of 'b * string ] list) IM.t ;
console_attached : 'b String.Map.t ; (* vm_name -> socket *)
console_version : Vmm_wire.version ;
stats_socket : 'a option ;
@ -30,6 +30,7 @@ type ('a, 'b) t = {
used_bridges : String.Set.t String.Map.t ;
(* TODO: used block devices (since each may only be active once) *)
resources : Vmm_resources.t ;
tasks : 'c String.Map.t ;
crls : X509.CRL.c list ;
}
@ -61,6 +62,7 @@ let init dir cmp console_socket stats_socket log_socket =
client_version = `WV0 ;
used_bridges = String.Map.empty ;
resources = Vmm_resources.empty ;
tasks = String.Map.empty ;
crls
}
@ -109,46 +111,20 @@ let handle_disconnect state t =
in
{ state with console_attached ; console_counter ; log_attached }, out
let handle_create t prefix chain cert force =
(* convert certificate to vm_config *)
Vmm_asn.vm_of_cert prefix cert >>= fun vm_config ->
Logs.debug (fun m -> m "vm %a" pp_vm_config vm_config) ;
(* check whether vm with same name is already running *)
let handle_create t vm_config policies =
let full = fullname vm_config in
(* get names and static resources *)
List.fold_left (fun acc ca ->
acc >>= fun acc ->
Vmm_asn.delegation_of_cert asn_version ca >>= fun res ->
let name = id ca in
Ok ((name, res) :: acc))
(Ok []) chain >>= fun res ->
(* check static policies *)
Logs.debug (fun m -> m "now checking static policies") ;
check_policies vm_config (List.map snd res) >>= fun () ->
(* may retract currently running vm to evaluate force-create! *)
(if force then
match Vmm_resources.find_vm t.resources full with
| None -> Ok (t.resources, None)
| Some vm ->
Vmm_resources.remove t.resources full vm >>= fun r -> Ok (r, Some vm)
else if Vmm_resources.exists t.resources full then
(if Vmm_resources.exists t.resources full then
Error (`Msg "VM with same name is already running")
else
Ok (t.resources, None)) >>= fun (resources, vm) ->
(* check dynamic usage *)
Ok ()) >>= fun () ->
Logs.debug (fun m -> m "now checking dynamic policies") ;
Vmm_resources.check_dynamic resources vm_config res >>= fun () ->
(* need to kill *)
(match vm with
| Some vm -> Vmm_commands.destroy vm
| None -> ()) ;
Vmm_resources.check_dynamic t.resources vm_config policies >>= fun () ->
(* prepare VM: save VM image to disk, create fifo, ... *)
Vmm_commands.prepare vm_config >>= fun (tmpfile, taps) ->
Logs.debug (fun m -> m "prepared vm %a" Fpath.pp tmpfile) ;
Ok (Fpath.basename tmpfile,
fun t s ->
Vmm_commands.prepare vm_config >>= fun taps ->
Logs.debug (fun m -> m "prepared vm with taps %a" Fmt.(list ~sep:(unit ",@ ") string) taps) ;
Ok (fun t s ->
(* actually execute the vm *)
Vmm_commands.exec t.dir vm_config tmpfile taps >>= fun vm ->
Vmm_commands.exec t.dir vm_config taps >>= fun vm ->
Logs.debug (fun m -> m "exec()ed vm") ;
Vmm_resources.insert t.resources full vm >>= fun resources ->
let used_bridges =
@ -161,7 +137,7 @@ let handle_create t prefix chain cert force =
t.used_bridges vm_config.network taps
in
let t = { t with resources ; used_bridges } in
let t, out = log t (Log.hdr prefix vm_config.vname, `VM_start (vm.pid, vm.taps, None)) in
let t, out = log t (Log.hdr vm_config.prefix vm_config.vname, `VM_start (vm.pid, vm.taps, None)) in
let tls_out = Vmm_wire.success ~msg:"VM started" 0 t.client_version in
Ok (t, `Tls (s, tls_out) :: out, vm))
@ -192,7 +168,8 @@ let handle_shutdown t vm r =
t.used_bridges vm.config.network vm.taps
in
let stat_out = Vmm_wire.Stats.remove t.stats_counter t.stats_version vm.pid in
let t = { t with stats_counter = succ t.stats_counter ; resources ; used_bridges } in
let tasks = String.Map.remove (vm_id vm.config) t.tasks in
let t = { t with stats_counter = succ t.stats_counter ; resources ; used_bridges ; tasks } in
let t, outs = log t (Log.hdr vm.config.prefix vm.config.vname,
`VM_stop (vm.pid, r))
in
@ -299,12 +276,13 @@ let handle_single_revocation t prefix serial =
(* also revoke all active sessions!? *)
(* TODO: maybe we need a vmm_resources like structure for sessions as well!? *)
let log_attached, kill =
match String.Map.find (string_of_id prefix) t.log_attached with
let pid = string_of_id prefix in
match String.Map.find pid t.log_attached with
| None -> t.log_attached, []
| Some xs ->
(* those where snd v = serial: drop *)
let drop, keep = List.partition (fun (_, s) -> String.equal s id) xs in
String.Map.add (string_of_id prefix) keep t.log_attached, drop
String.Map.add pid keep t.log_attached, drop
in
(* two things:
1 revoked LEAF certs need to go (k = prefix, snd v = serial) [see above]
@ -389,12 +367,50 @@ let handle_initial t s addr chain ca =
let initial_out = `Tls (s, Vmm_wire.Client.log login_hdr login_ev t.client_version) in
Vmm_asn.permissions_of_cert asn_version leaf >>= fun perms ->
(if (List.mem `Create perms || List.mem `Force_create perms) && Vmm_asn.contains_vm leaf then
let force = List.mem `Force_create perms in
handle_create t prefix chain leaf force >>= fun (file, cont) ->
let cons = Vmm_wire.Console.add t.console_counter t.console_version file in
Ok ({ t with console_counter = succ t.console_counter },
[ `Raw (t.console_socket, cons) ],
`Create cont)
(* convert certificate to vm_config *)
Vmm_asn.vm_of_cert prefix leaf >>= fun vm_config ->
Logs.debug (fun m -> m "vm %a" pp_vm_config vm_config) ;
(* get names and static resources *)
List.fold_left (fun acc ca ->
acc >>= fun acc ->
Vmm_asn.delegation_of_cert asn_version ca >>= fun res ->
let name = id ca in
Ok ((name, res) :: acc))
(Ok []) chain >>= fun policies ->
(* check static policies *)
Logs.debug (fun m -> m "now checking static policies") ;
check_policies vm_config (List.map snd policies) >>= fun () ->
let t, task =
let force = List.mem `Force_create perms in
if force then
let fid = vm_id vm_config in
match String.Map.find fid t.tasks with
| None -> t, None
| Some task ->
let kill () =
match Vmm_resources.find_vm t.resources (fullname vm_config) with
| None ->
Logs.err (fun m -> m "found a task, but no vm for %a (%s)"
pp_id (fullname vm_config) fid)
| Some vm ->
Logs.debug (fun m -> m "killing %a now" pp_vm vm) ;
Vmm_commands.destroy vm
in
let tasks = String.Map.remove fid t.tasks in
({ t with tasks }, Some (kill, task))
else
t, None
in
let next t sleeper =
handle_create t vm_config policies >>= fun cont ->
let id = vm_id vm_config in
let cons = Vmm_wire.Console.add t.console_counter t.console_version id in
let tasks = String.Map.add id sleeper t.tasks in
Ok ({ t with console_counter = succ t.console_counter ; tasks },
[ `Raw (t.console_socket, cons) ],
cont)
in
Ok (t, [], `Create (task, next))
else if List.mem `Crl perms && Vmm_asn.contains_crl leaf then
handle_revocation t s leaf chain ca prefix
else

View file

@ -112,12 +112,7 @@ let insert m name v =
let remove m name vm =
let rec del m = function
| [] -> Error (`Msg "should not happen: empty labels in remove")
| [l] ->
(match String.Map.find l m with
| None -> Ok m
| Some (Leaf vm') when vm'.pid = vm.pid -> Ok (String.Map.remove l m)
| Some (Leaf _) -> Ok m
| Some (Subtree _) -> Ok (String.Map.remove l m)) (* TODO: not sure about this case *)
| [l] -> Ok (String.Map.remove l m)
| l::ls -> match String.Map.find l m with
| None -> Error (`Msg "should not happen: found nothing in remove while still had some labels")
| Some (Subtree (r, m')) ->