vmm_stats: fix fd leaks (fixes #10)

- vm_open only once per vm (vmmapi_open), returning vmctx
 - get stats descriptions only once in lifetime (vmmapi_statnames)
 - close file descriptor on `remove_pid` (vmmapi_close)

vmm_stats_once: command line utility (for debugging) for stats gathering
This commit is contained in:
Hannes Mehnert 2018-03-24 22:26:10 +01:00
parent 26f8734235
commit 51d1dc63dc
6 changed files with 207 additions and 53 deletions

3
_tags
View file

@ -16,4 +16,5 @@ true : package(rresult logs ipaddr x509 tls bos hex ptime ptime.clock.os astring
<provision/*>: package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty asn1-combinators nocrypto.unix lwt decompress) <provision/*>: package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty asn1-combinators nocrypto.unix lwt decompress)
<stats/vmm_stats_lwt.{ml,native,byte}>: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress) <stats/vmm_stats_lwt.{ml,native,byte}>: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress)
<stats/vmm_stats_once.{ml,native,byte}>: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress)

View file

@ -18,5 +18,6 @@ let () =
Pkg.bin "provision/vmm_gen_ca" ; Pkg.bin "provision/vmm_gen_ca" ;
Pkg.clib "stats/libvmm_stats_stubs.clib" ; Pkg.clib "stats/libvmm_stats_stubs.clib" ;
Pkg.bin "stats/vmm_stats_lwt" ; Pkg.bin "stats/vmm_stats_lwt" ;
Pkg.bin "stats/vmm_stats_once" ;
Pkg.bin "app/vmm_prometheus_stats" ; Pkg.bin "app/vmm_prometheus_stats" ;
] ]

View file

@ -1,4 +1,4 @@
(* (c) 2017 Hannes Mehnert, all rights reserved *) (* (c) 2017, 2018 Hannes Mehnert, all rights reserved *)
open Astring open Astring
@ -7,12 +7,20 @@ open Vmm_core
external sysctl_rusage : int -> rusage = "vmmanage_sysctl_rusage" external sysctl_rusage : int -> rusage = "vmmanage_sysctl_rusage"
external sysctl_ifcount : unit -> int = "vmmanage_sysctl_ifcount" external sysctl_ifcount : unit -> int = "vmmanage_sysctl_ifcount"
external sysctl_ifdata : int -> ifdata = "vmmanage_sysctl_ifdata" external sysctl_ifdata : int -> ifdata = "vmmanage_sysctl_ifdata"
external vmmapi_stats : string -> (string * int64) list = "vmmanage_vmmapi_stats"
type vmctx
external vmmapi_open : string -> vmctx = "vmmanage_vmmapi_open"
external vmmapi_close : vmctx -> unit = "vmmanage_vmmapi_close"
external vmmapi_statnames : vmctx -> string list = "vmmanage_vmmapi_statnames"
external vmmapi_stats : vmctx -> int64 list = "vmmanage_vmmapi_stats"
let my_version = `WV0 let my_version = `WV0
let descr = ref []
type t = { type t = {
pid_nic : (int * string) list IM.t ; pid_nic : (vmctx * (int * string) list) IM.t ;
pid_rusage : rusage IM.t ; pid_rusage : rusage IM.t ;
pid_vmmapi : (string * int64) list IM.t ; pid_vmmapi : (string * int64) list IM.t ;
nic_ifdata : ifdata String.Map.t ; nic_ifdata : ifdata String.Map.t ;
@ -21,60 +29,81 @@ type t = {
let empty () = let empty () =
{ pid_nic = IM.empty ; pid_rusage = IM.empty ; pid_vmmapi = IM.empty ; nic_ifdata = String.Map.empty } { pid_nic = IM.empty ; pid_rusage = IM.empty ; pid_vmmapi = IM.empty ; nic_ifdata = String.Map.empty }
let rec safe_sysctl f arg = let rec wrap f arg =
try Some (f arg) with try Some (f arg) with
| Unix.Unix_error (Unix.EINTR, _, _) -> safe_sysctl f arg | Unix.Unix_error (Unix.EINTR, _, _) -> wrap f arg
| _ -> None | _ -> None
let vm_vmmapi_stats pid = let gather pid vmctx nics =
let name = "ukvm" ^ string_of_int pid in wrap sysctl_rusage pid,
try Some (vmmapi_stats name) with _ -> None wrap vmmapi_stats vmctx,
List.fold_left (fun ifd (nic, nname) ->
let gather pid nics = match wrap sysctl_ifdata nic with
safe_sysctl sysctl_rusage pid, | None ->
vm_vmmapi_stats pid, Logs.warn (fun m -> m "failed to get ifdata for %s" nname) ;
List.fold_left (fun ifd (nic, _) -> ifd
match safe_sysctl sysctl_ifdata nic with
| None -> ifd
| Some data -> String.Map.add data.name data ifd) | Some data -> String.Map.add data.name data ifd)
String.Map.empty nics String.Map.empty nics
let tick t = let tick t =
Logs.debug (fun m -> m "tick with %d vms" (IM.cardinal t.pid_nic)) ; Logs.debug (fun m -> m "tick with %d vms" (IM.cardinal t.pid_nic)) ;
let pid_rusage, pid_vmmapi, nic_ifdata = let pid_rusage, pid_vmmapi, nic_ifdata =
IM.fold (fun pid nics (rus, vmms, ifds) -> IM.fold (fun pid (vmctx, nics) (rus, vmms, ifds) ->
let ru, vmm, ifd = gather pid nics in let ru, vmm, ifd = gather pid vmctx nics in
(match ru with (match ru with
| None -> rus | None ->
Logs.warn (fun m -> m "failed to get rusage for %d" pid) ;
rus
| Some ru -> IM.add pid ru rus), | Some ru -> IM.add pid ru rus),
(match vmm with (match vmm with
| None -> vmms | None ->
| Some vmm -> IM.add pid vmm vmms), Logs.warn (fun m -> m "failed to get vmmapi_stats for %d" pid) ;
vmms
| Some vmm -> IM.add pid (List.combine !descr vmm) vmms),
String.Map.union (fun _k a _b -> Some a) ifd ifds) String.Map.union (fun _k a _b -> Some a) ifd ifds)
t.pid_nic (IM.empty, IM.empty, String.Map.empty) t.pid_nic (IM.empty, IM.empty, String.Map.empty)
in in
{ t with pid_rusage ; pid_vmmapi ; nic_ifdata } { t with pid_rusage ; pid_vmmapi ; nic_ifdata }
let fill_descr ctx =
match !descr with
| [] ->
begin match wrap vmmapi_statnames ctx with
| None -> Error (`Msg "vmmapi_statnames failed, shouldn't happen")
| Some d ->
Logs.info (fun m -> m "descr are %a" Fmt.(list ~sep:(unit ",@ ") string) d) ;
descr := d ;
Ok ()
end
| ds ->
Logs.info (fun m -> m "descr are already %a" Fmt.(list ~sep:(unit ",@ ") string) ds) ;
Ok ()
let add_pid t pid nics = let add_pid t pid nics =
match safe_sysctl sysctl_ifcount () with let name = "ukvm" ^ string_of_int pid in
| None -> Error (`Msg "sysctl ifcount failed") match wrap sysctl_ifcount (), wrap vmmapi_open name with
| Some max_nic -> | None, _ -> Error (`Msg "sysctl ifcount failed")
let rec go cnt acc id = | _, None -> Error (`Msg "vmmapi_open failed")
if id > 0 && cnt > 0 then | Some max_nic, Some vmctx ->
match safe_sysctl sysctl_ifdata id with match fill_descr vmctx with
| Some ifd when List.mem ifd.name nics -> | Error e -> Error e
| Ok () ->
let rec go cnt acc id =
if id > 0 && cnt > 0 then
match wrap sysctl_ifdata id with
| Some ifd when List.mem ifd.name nics ->
go (pred cnt) ((id, ifd.name) :: acc) (pred id) go (pred cnt) ((id, ifd.name) :: acc) (pred id)
| _ -> go cnt acc (pred id) | _ -> go cnt acc (pred id)
else else
List.rev acc List.rev acc
in in
let nic_ids = go (List.length nics) [] max_nic in let nic_ids = go (List.length nics) [] max_nic in
let pid_nic = IM.add pid nic_ids t.pid_nic in let pid_nic = IM.add pid (vmctx, nic_ids) t.pid_nic in
Ok { t with pid_nic } Ok { t with pid_nic }
let stats t pid = let stats t pid =
try try
let nics = IM.find pid t.pid_nic let _, nics = IM.find pid t.pid_nic
and ru = IM.find pid t.pid_rusage and ru = IM.find pid t.pid_rusage
and vmm = IM.find pid t.pid_vmmapi and vmm = IM.find pid t.pid_vmmapi
in in
@ -92,7 +121,12 @@ let stats t pid =
| _ -> Error (`Msg "failed to find resource usage") | _ -> Error (`Msg "failed to find resource usage")
let remove_pid t pid = let remove_pid t pid =
(* can this err? -- do I care? *) (try
let vmctx, _ = IM.find pid t.pid_nic in
let _ = wrap vmmapi_close vmctx in
()
with
_ -> ()) ;
let pid_nic = IM.remove pid t.pid_nic in let pid_nic = IM.remove pid t.pid_nic in
{ t with pid_nic } { t with pid_nic }

View file

@ -1,4 +1,4 @@
(* (c) 2017 Hannes Mehnert, all rights reserved *) (* (c) 2017, 2018 Hannes Mehnert, all rights reserved *)
(* the process responsible for gathering statistics (CPU + mem + network) *) (* the process responsible for gathering statistics (CPU + mem + network) *)
@ -9,7 +9,7 @@
- remove pid - remove pid
- statistics pid - statistics pid
every 5 minutes, statistics of all registered pids are recorded. `statistics` every 15 seconds, statistics of all registered pids are recorded. `statistics`
reports last recorded stats *) reports last recorded stats *)
open Lwt.Infix open Lwt.Infix

71
stats/vmm_stats_once.ml Normal file
View file

@ -0,0 +1,71 @@
(* (c) 2017, 2018 Hannes Mehnert, all rights reserved *)
(* the process responsible for gathering statistics (CPU + mem + network) *)
open Lwt.Infix
let t = ref (Vmm_stats.empty ())
let rec timer pids () =
t := Vmm_stats.tick !t ;
List.iter (fun pid ->
match Vmm_stats.stats !t pid with
| Ok (ru, vmm, ifd) ->
Logs.info (fun m -> m "stats %d@.%a@.%a@.%a@."
pid Vmm_core.pp_rusage ru
Fmt.(list ~sep:(unit "@.") (pair ~sep:(unit ": ") string int64)) vmm
Fmt.(list ~sep:(unit "@.") Vmm_core.pp_ifdata) ifd)
| Error (`Msg e) ->
Logs.err (fun m -> m "error %s while getting stats of %d" e pid))
pids ;
Lwt_unix.sleep Duration.(to_f (of_sec 1)) >>= fun () ->
timer pids ()
let split_pid xs =
List.fold_left (fun acc str ->
match Astring.String.cuts ~sep:":" str with
| pid :: taps -> (int_of_string pid, taps) :: acc
| [] -> invalid_arg "invalid pid") [] xs
let jump _ pids =
Sys.(set_signal sigpipe Signal_ignore) ;
let pid_taps = split_pid pids in
let st =
List.fold_left (fun t (pid, taps) ->
match Vmm_stats.add_pid t pid taps with
| Ok t ->
Logs.info (fun m -> m "added pid %d taps %a"
pid Fmt.(list ~sep:(unit ", ") string) taps) ;
t
| Error (`Msg ms) ->
Logs.err (fun m -> m "error %s while adding pid %d taps %a"
ms pid Fmt.(list ~sep:(unit ", ") string) taps);
invalid_arg "broken")
!t pid_taps
in
t := st ;
let pids = fst (List.split pid_taps) in
Lwt_main.run (timer pids ()) ;
`Ok ()
let setup_log style_renderer level =
Fmt_tty.setup_std_outputs ?style_renderer ();
Logs.set_level level;
Logs.set_reporter (Logs_fmt.reporter ~dst:Format.std_formatter ())
open Cmdliner
let setup_log =
Term.(const setup_log
$ Fmt_cli.style_renderer ()
$ Logs_cli.level ())
let pids =
let doc = "pids" in
Arg.(value & opt_all string [] & info [ "pid" ] ~doc)
let cmd =
Term.(ret (const jump $ setup_log $ pids)),
Term.info "vmm_stats_once" ~version:"%%VERSION_NUM%%"
let () = match Term.eval cmd with `Ok () -> exit 0 | _ -> exit 1

View file

@ -1,3 +1,5 @@
// (c) 2017, 2018 Hannes Mehnert, all rights reserved
#include <caml/mlvalues.h> #include <caml/mlvalues.h>
#include <caml/alloc.h> #include <caml/alloc.h>
#include <caml/memory.h> #include <caml/memory.h>
@ -70,12 +72,8 @@ CAMLprim value vmmanage_sysctl_rusage (value pid_r) {
CAMLreturn(res); CAMLreturn(res);
} }
CAMLprim value vmmanage_vmmapi_stats (value name) { CAMLprim value vmmanage_vmmapi_open (value name) {
CAMLparam1(name); CAMLparam1(name);
CAMLlocal3(res, tmp, t);
int i, num_stats;
uint64_t *stats;
const char *desc;
struct vmctx *ctx; struct vmctx *ctx;
const char *devname; const char *devname;
@ -84,23 +82,56 @@ CAMLprim value vmmanage_vmmapi_stats (value name) {
devname = String_val(name); devname = String_val(name);
ctx = vm_open(devname); ctx = vm_open(devname);
if (ctx == NULL) uerror("vm_open", Nothing); if (ctx == NULL) uerror("vm_open", Nothing);
CAMLreturn((value)ctx);
}
stats = vm_get_stats(ctx, 0, NULL, &num_stats); CAMLprim value vmmanage_vmmapi_close (value octx) {
if (stats != NULL) { struct vmctx *ctx = (struct vmctx*)octx;
close(vm_get_device_fd(ctx));
free(ctx);
return Val_unit;
}
CAMLprim value vmmanage_vmmapi_statnames (value octx) {
CAMLparam0();
CAMLlocal2(res, tmp);
struct vmctx *ctx = (struct vmctx*)octx;
int i, num_stats;
uint64_t *s;
const char *desc;
s = vm_get_stats(ctx, 0, NULL, &num_stats);
if (s != NULL) {
for (i = 0; i < num_stats; i++) { for (i = 0; i < num_stats; i++) {
tmp = caml_alloc(2, 0);
desc = vm_get_stat_desc(ctx, i); desc = vm_get_stat_desc(ctx, i);
tmp = caml_alloc(2, 0);
Store_field (tmp, 0, caml_copy_string(desc)); Store_field (tmp, 0, caml_copy_string(desc));
Store_field (tmp, 1, Val64(stats[i])); Store_field (tmp, 1, res);
t = caml_alloc(2, 0); res = tmp;
Store_field (t, 0, tmp);
Store_field (t, 1, res);
res = t;
} }
} }
CAMLreturn(res); CAMLreturn(res);
} }
CAMLprim value vmmanage_vmmapi_stats (value octx) {
CAMLparam0();
CAMLlocal2(res, tmp);
int i, num_stats;
uint64_t *stats;
struct vmctx *ctx = (struct vmctx*)octx;
stats = vm_get_stats(ctx, 0, NULL, &num_stats);
if (stats != NULL) {
for (i = 0; i < num_stats; i++) {
tmp = caml_alloc(2, 0);
Store_field (tmp, 0, Val64(stats[i]));
Store_field (tmp, 1, res);
res = tmp;
}
}
CAMLreturn(res);
}
CAMLprim value vmmanage_sysctl_ifcount (value unit) { CAMLprim value vmmanage_sysctl_ifcount (value unit) {
CAMLparam1(unit); CAMLparam1(unit);
@ -180,8 +211,24 @@ CAMLprim value vmmanage_sysctl_ifdata (value num) {
uerror("sysctl_ifdata", Nothing); uerror("sysctl_ifdata", Nothing);
} }
CAMLprim value vmmanage_vmmapi_open (value name) {
CAMLparam1(name);
uerror("vmmapi_open", Nothing);
}
CAMLprim value vmmanage_vmmapi_close (value name) {
CAMLparam1(name);
uerror("vmmapi_close", Nothing);
}
CAMLprim value vmmanage_vmmapi_stats (value name) { CAMLprim value vmmanage_vmmapi_stats (value name) {
CAMLparam1(name); CAMLparam1(name);
uerror("vm_stat", Nothing); uerror("vmmapi_stats", Nothing);
} }
CAMLprim value vmmanage_vmmapi_statnames (value name) {
CAMLparam1(name);
uerror("vmmapi_statnames", Nothing);
}
#endif #endif