From 51d1dc63dc0e75e671834a6cadbd602aaab44883 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sat, 24 Mar 2018 22:26:10 +0100 Subject: [PATCH] vmm_stats: fix fd leaks (fixes #10) - vm_open only once per vm (vmmapi_open), returning vmctx - get stats descriptions only once in lifetime (vmmapi_statnames) - close file descriptor on `remove_pid` (vmmapi_close) vmm_stats_once: command line utility (for debugging) for stats gathering --- _tags | 3 +- pkg/pkg.ml | 1 + stats/vmm_stats.ml | 106 ++++++++++++++++++++++++++-------------- stats/vmm_stats_lwt.ml | 4 +- stats/vmm_stats_once.ml | 71 +++++++++++++++++++++++++++ stats/vmm_stats_stubs.c | 75 ++++++++++++++++++++++------ 6 files changed, 207 insertions(+), 53 deletions(-) create mode 100644 stats/vmm_stats_once.ml diff --git a/_tags b/_tags index f3fe1ff..4253201 100644 --- a/_tags +++ b/_tags @@ -16,4 +16,5 @@ true : package(rresult logs ipaddr x509 tls bos hex ptime ptime.clock.os astring : package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty asn1-combinators nocrypto.unix lwt decompress) -: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress) \ No newline at end of file +: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress) +: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress) diff --git a/pkg/pkg.ml b/pkg/pkg.ml index e65233e..2a71d0b 100644 --- a/pkg/pkg.ml +++ b/pkg/pkg.ml @@ -18,5 +18,6 @@ let () = Pkg.bin "provision/vmm_gen_ca" ; Pkg.clib "stats/libvmm_stats_stubs.clib" ; Pkg.bin "stats/vmm_stats_lwt" ; + Pkg.bin "stats/vmm_stats_once" ; Pkg.bin "app/vmm_prometheus_stats" ; ] diff --git a/stats/vmm_stats.ml b/stats/vmm_stats.ml index 4bb39a6..95fb92f 100644 --- a/stats/vmm_stats.ml +++ b/stats/vmm_stats.ml @@ -1,4 +1,4 @@ -(* (c) 2017 Hannes Mehnert, all rights reserved *) +(* (c) 2017, 2018 Hannes Mehnert, all rights reserved *) open Astring @@ -7,12 +7,20 @@ open Vmm_core external sysctl_rusage : int -> rusage = "vmmanage_sysctl_rusage" external sysctl_ifcount : unit -> int = "vmmanage_sysctl_ifcount" external sysctl_ifdata : int -> ifdata = "vmmanage_sysctl_ifdata" -external vmmapi_stats : string -> (string * int64) list = "vmmanage_vmmapi_stats" + +type vmctx + +external vmmapi_open : string -> vmctx = "vmmanage_vmmapi_open" +external vmmapi_close : vmctx -> unit = "vmmanage_vmmapi_close" +external vmmapi_statnames : vmctx -> string list = "vmmanage_vmmapi_statnames" +external vmmapi_stats : vmctx -> int64 list = "vmmanage_vmmapi_stats" let my_version = `WV0 +let descr = ref [] + type t = { - pid_nic : (int * string) list IM.t ; + pid_nic : (vmctx * (int * string) list) IM.t ; pid_rusage : rusage IM.t ; pid_vmmapi : (string * int64) list IM.t ; nic_ifdata : ifdata String.Map.t ; @@ -21,60 +29,81 @@ type t = { let empty () = { pid_nic = IM.empty ; pid_rusage = IM.empty ; pid_vmmapi = IM.empty ; nic_ifdata = String.Map.empty } -let rec safe_sysctl f arg = +let rec wrap f arg = try Some (f arg) with - | Unix.Unix_error (Unix.EINTR, _, _) -> safe_sysctl f arg + | Unix.Unix_error (Unix.EINTR, _, _) -> wrap f arg | _ -> None -let vm_vmmapi_stats pid = - let name = "ukvm" ^ string_of_int pid in - try Some (vmmapi_stats name) with _ -> None - -let gather pid nics = - safe_sysctl sysctl_rusage pid, - vm_vmmapi_stats pid, - List.fold_left (fun ifd (nic, _) -> - match safe_sysctl sysctl_ifdata nic with - | None -> ifd +let gather pid vmctx nics = + wrap sysctl_rusage pid, + wrap vmmapi_stats vmctx, + List.fold_left (fun ifd (nic, nname) -> + match wrap sysctl_ifdata nic with + | None -> + Logs.warn (fun m -> m "failed to get ifdata for %s" nname) ; + ifd | Some data -> String.Map.add data.name data ifd) String.Map.empty nics let tick t = Logs.debug (fun m -> m "tick with %d vms" (IM.cardinal t.pid_nic)) ; let pid_rusage, pid_vmmapi, nic_ifdata = - IM.fold (fun pid nics (rus, vmms, ifds) -> - let ru, vmm, ifd = gather pid nics in + IM.fold (fun pid (vmctx, nics) (rus, vmms, ifds) -> + let ru, vmm, ifd = gather pid vmctx nics in (match ru with - | None -> rus + | None -> + Logs.warn (fun m -> m "failed to get rusage for %d" pid) ; + rus | Some ru -> IM.add pid ru rus), (match vmm with - | None -> vmms - | Some vmm -> IM.add pid vmm vmms), + | None -> + Logs.warn (fun m -> m "failed to get vmmapi_stats for %d" pid) ; + vmms + | Some vmm -> IM.add pid (List.combine !descr vmm) vmms), String.Map.union (fun _k a _b -> Some a) ifd ifds) t.pid_nic (IM.empty, IM.empty, String.Map.empty) in { t with pid_rusage ; pid_vmmapi ; nic_ifdata } +let fill_descr ctx = + match !descr with + | [] -> + begin match wrap vmmapi_statnames ctx with + | None -> Error (`Msg "vmmapi_statnames failed, shouldn't happen") + | Some d -> + Logs.info (fun m -> m "descr are %a" Fmt.(list ~sep:(unit ",@ ") string) d) ; + descr := d ; + Ok () + end + | ds -> + Logs.info (fun m -> m "descr are already %a" Fmt.(list ~sep:(unit ",@ ") string) ds) ; + Ok () + let add_pid t pid nics = - match safe_sysctl sysctl_ifcount () with - | None -> Error (`Msg "sysctl ifcount failed") - | Some max_nic -> - let rec go cnt acc id = - if id > 0 && cnt > 0 then - match safe_sysctl sysctl_ifdata id with - | Some ifd when List.mem ifd.name nics -> + let name = "ukvm" ^ string_of_int pid in + match wrap sysctl_ifcount (), wrap vmmapi_open name with + | None, _ -> Error (`Msg "sysctl ifcount failed") + | _, None -> Error (`Msg "vmmapi_open failed") + | Some max_nic, Some vmctx -> + match fill_descr vmctx with + | Error e -> Error e + | Ok () -> + let rec go cnt acc id = + if id > 0 && cnt > 0 then + match wrap sysctl_ifdata id with + | Some ifd when List.mem ifd.name nics -> go (pred cnt) ((id, ifd.name) :: acc) (pred id) - | _ -> go cnt acc (pred id) - else - List.rev acc - in - let nic_ids = go (List.length nics) [] max_nic in - let pid_nic = IM.add pid nic_ids t.pid_nic in - Ok { t with pid_nic } + | _ -> go cnt acc (pred id) + else + List.rev acc + in + let nic_ids = go (List.length nics) [] max_nic in + let pid_nic = IM.add pid (vmctx, nic_ids) t.pid_nic in + Ok { t with pid_nic } let stats t pid = try - let nics = IM.find pid t.pid_nic + let _, nics = IM.find pid t.pid_nic and ru = IM.find pid t.pid_rusage and vmm = IM.find pid t.pid_vmmapi in @@ -92,7 +121,12 @@ let stats t pid = | _ -> Error (`Msg "failed to find resource usage") let remove_pid t pid = - (* can this err? -- do I care? *) + (try + let vmctx, _ = IM.find pid t.pid_nic in + let _ = wrap vmmapi_close vmctx in + () + with + _ -> ()) ; let pid_nic = IM.remove pid t.pid_nic in { t with pid_nic } diff --git a/stats/vmm_stats_lwt.ml b/stats/vmm_stats_lwt.ml index f79f8a4..36c23aa 100644 --- a/stats/vmm_stats_lwt.ml +++ b/stats/vmm_stats_lwt.ml @@ -1,4 +1,4 @@ -(* (c) 2017 Hannes Mehnert, all rights reserved *) +(* (c) 2017, 2018 Hannes Mehnert, all rights reserved *) (* the process responsible for gathering statistics (CPU + mem + network) *) @@ -9,7 +9,7 @@ - remove pid - statistics pid - every 5 minutes, statistics of all registered pids are recorded. `statistics` + every 15 seconds, statistics of all registered pids are recorded. `statistics` reports last recorded stats *) open Lwt.Infix diff --git a/stats/vmm_stats_once.ml b/stats/vmm_stats_once.ml new file mode 100644 index 0000000..629b20e --- /dev/null +++ b/stats/vmm_stats_once.ml @@ -0,0 +1,71 @@ +(* (c) 2017, 2018 Hannes Mehnert, all rights reserved *) + +(* the process responsible for gathering statistics (CPU + mem + network) *) + +open Lwt.Infix + +let t = ref (Vmm_stats.empty ()) + +let rec timer pids () = + t := Vmm_stats.tick !t ; + List.iter (fun pid -> + match Vmm_stats.stats !t pid with + | Ok (ru, vmm, ifd) -> + Logs.info (fun m -> m "stats %d@.%a@.%a@.%a@." + pid Vmm_core.pp_rusage ru + Fmt.(list ~sep:(unit "@.") (pair ~sep:(unit ": ") string int64)) vmm + Fmt.(list ~sep:(unit "@.") Vmm_core.pp_ifdata) ifd) + | Error (`Msg e) -> + Logs.err (fun m -> m "error %s while getting stats of %d" e pid)) + pids ; + Lwt_unix.sleep Duration.(to_f (of_sec 1)) >>= fun () -> + timer pids () + +let split_pid xs = + List.fold_left (fun acc str -> + match Astring.String.cuts ~sep:":" str with + | pid :: taps -> (int_of_string pid, taps) :: acc + | [] -> invalid_arg "invalid pid") [] xs + +let jump _ pids = + Sys.(set_signal sigpipe Signal_ignore) ; + let pid_taps = split_pid pids in + let st = + List.fold_left (fun t (pid, taps) -> + match Vmm_stats.add_pid t pid taps with + | Ok t -> + Logs.info (fun m -> m "added pid %d taps %a" + pid Fmt.(list ~sep:(unit ", ") string) taps) ; + t + | Error (`Msg ms) -> + Logs.err (fun m -> m "error %s while adding pid %d taps %a" + ms pid Fmt.(list ~sep:(unit ", ") string) taps); + invalid_arg "broken") + !t pid_taps + in + t := st ; + let pids = fst (List.split pid_taps) in + Lwt_main.run (timer pids ()) ; + `Ok () + +let setup_log style_renderer level = + Fmt_tty.setup_std_outputs ?style_renderer (); + Logs.set_level level; + Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ()) + +open Cmdliner + +let setup_log = + Term.(const setup_log + $ Fmt_cli.style_renderer () + $ Logs_cli.level ()) + +let pids = + let doc = "pids" in + Arg.(value & opt_all string [] & info [ "pid" ] ~doc) + +let cmd = + Term.(ret (const jump $ setup_log $ pids)), + Term.info "vmm_stats_once" ~version:"%%VERSION_NUM%%" + +let () = match Term.eval cmd with `Ok () -> exit 0 | _ -> exit 1 diff --git a/stats/vmm_stats_stubs.c b/stats/vmm_stats_stubs.c index 4db9a63..b6d8b5b 100644 --- a/stats/vmm_stats_stubs.c +++ b/stats/vmm_stats_stubs.c @@ -1,3 +1,5 @@ +// (c) 2017, 2018 Hannes Mehnert, all rights reserved + #include #include #include @@ -70,12 +72,8 @@ CAMLprim value vmmanage_sysctl_rusage (value pid_r) { CAMLreturn(res); } -CAMLprim value vmmanage_vmmapi_stats (value name) { +CAMLprim value vmmanage_vmmapi_open (value name) { CAMLparam1(name); - CAMLlocal3(res, tmp, t); - int i, num_stats; - uint64_t *stats; - const char *desc; struct vmctx *ctx; const char *devname; @@ -84,23 +82,56 @@ CAMLprim value vmmanage_vmmapi_stats (value name) { devname = String_val(name); ctx = vm_open(devname); if (ctx == NULL) uerror("vm_open", Nothing); + CAMLreturn((value)ctx); +} - stats = vm_get_stats(ctx, 0, NULL, &num_stats); - if (stats != NULL) { +CAMLprim value vmmanage_vmmapi_close (value octx) { + struct vmctx *ctx = (struct vmctx*)octx; + + close(vm_get_device_fd(ctx)); + free(ctx); + return Val_unit; +} + +CAMLprim value vmmanage_vmmapi_statnames (value octx) { + CAMLparam0(); + CAMLlocal2(res, tmp); + struct vmctx *ctx = (struct vmctx*)octx; + int i, num_stats; + uint64_t *s; + const char *desc; + + s = vm_get_stats(ctx, 0, NULL, &num_stats); + if (s != NULL) { for (i = 0; i < num_stats; i++) { - tmp = caml_alloc(2, 0); desc = vm_get_stat_desc(ctx, i); + tmp = caml_alloc(2, 0); Store_field (tmp, 0, caml_copy_string(desc)); - Store_field (tmp, 1, Val64(stats[i])); - t = caml_alloc(2, 0); - Store_field (t, 0, tmp); - Store_field (t, 1, res); - res = t; + Store_field (tmp, 1, res); + res = tmp; } } CAMLreturn(res); } +CAMLprim value vmmanage_vmmapi_stats (value octx) { + CAMLparam0(); + CAMLlocal2(res, tmp); + int i, num_stats; + uint64_t *stats; + struct vmctx *ctx = (struct vmctx*)octx; + + stats = vm_get_stats(ctx, 0, NULL, &num_stats); + if (stats != NULL) { + for (i = 0; i < num_stats; i++) { + tmp = caml_alloc(2, 0); + Store_field (tmp, 0, Val64(stats[i])); + Store_field (tmp, 1, res); + res = tmp; + } + } + CAMLreturn(res); +} CAMLprim value vmmanage_sysctl_ifcount (value unit) { CAMLparam1(unit); @@ -180,8 +211,24 @@ CAMLprim value vmmanage_sysctl_ifdata (value num) { uerror("sysctl_ifdata", Nothing); } +CAMLprim value vmmanage_vmmapi_open (value name) { + CAMLparam1(name); + uerror("vmmapi_open", Nothing); +} + +CAMLprim value vmmanage_vmmapi_close (value name) { + CAMLparam1(name); + uerror("vmmapi_close", Nothing); +} + CAMLprim value vmmanage_vmmapi_stats (value name) { CAMLparam1(name); - uerror("vm_stat", Nothing); + uerror("vmmapi_stats", Nothing); } + +CAMLprim value vmmanage_vmmapi_statnames (value name) { + CAMLparam1(name); + uerror("vmmapi_statnames", Nothing); +} + #endif