albatross/stats/albatross_stats.ml

96 lines
2.9 KiB
OCaml
Raw Normal View History

(* (c) 2017, 2018 Hannes Mehnert, all rights reserved *)
2017-05-26 14:30:34 +00:00
(* the process responsible for gathering statistics (CPU + mem + network) *)
(* a shared unix domain socket between vmmd and vmm_stats is used as
communication channel, where the vmmd can issue commands:
- add pid taps
- remove pid
- statistics pid
every 10 seconds, statistics of all registered pids are recorded. `statistics`
2017-05-26 14:30:34 +00:00
reports last recorded stats *)
open Lwt.Infix
2019-03-27 23:11:43 +00:00
open Albatross_stats_pure
2018-10-25 14:55:54 +00:00
let t = ref (empty ())
2017-05-26 14:30:34 +00:00
let pp_sockaddr ppf = function
| Lwt_unix.ADDR_UNIX str -> Fmt.pf ppf "unix domain socket %s" str
| Lwt_unix.ADDR_INET (addr, port) -> Fmt.pf ppf "TCP %s:%d"
(Unix.string_of_inet_addr addr) port
2019-10-10 20:26:36 +00:00
let handle s addr =
2017-05-26 14:30:34 +00:00
Logs.info (fun m -> m "handling stats connection %a" pp_sockaddr addr) ;
let rec loop () =
Vmm_lwt.read_wire s >>= function
| Error _ ->
Logs.err (fun m -> m "exception while reading") ;
Lwt.return_unit
2018-10-22 22:02:05 +00:00
| Ok wire ->
2018-10-25 14:55:54 +00:00
match handle !t s wire with
| Error (`Msg msg) ->
Vmm_lwt.write_wire s (fst wire, `Failure msg) >>= fun _ ->
Lwt.return_unit
| Ok (t', close, out) ->
t := t' ;
Vmm_lwt.write_wire s (fst wire, `Success (`String out)) >>= function
| Ok () ->
(match close with
| Some s' ->
Vmm_lwt.safe_close s' >>= fun () ->
(* read the next *)
loop ()
| None -> loop ())
| Error _ ->
Logs.err (fun m -> m "error while writing") ;
Lwt.return_unit
2017-05-26 14:30:34 +00:00
in
loop () >>= fun () ->
Vmm_lwt.safe_close s
2017-05-26 14:30:34 +00:00
let timer () =
2018-10-25 14:55:54 +00:00
let t', outs = tick !t in
t := t' ;
Lwt_list.iter_p (fun (s, id, stat) ->
Vmm_lwt.write_wire s stat >>= function
| Ok () -> Lwt.return_unit
| Error `Exception ->
Logs.debug (fun m -> m "removing entry %a" Vmm_core.Name.pp id) ;
t := remove_entry !t id ;
Vmm_lwt.safe_close s)
outs
2017-05-26 14:30:34 +00:00
2019-10-10 20:26:36 +00:00
let m = Albatross_cli.conn_metrics "unix"
let jump _ interval influx =
2017-05-26 14:30:34 +00:00
Sys.(set_signal sigpipe Signal_ignore) ;
let interval = Duration.(to_f (of_sec interval)) in
2017-05-26 14:30:34 +00:00
Lwt_main.run
2019-10-10 20:26:36 +00:00
(Albatross_cli.init_influx "albatross_stats" influx;
Vmm_lwt.server_socket `Stats >>= fun s ->
let _ev = Lwt_engine.on_timer interval true (fun _e -> Lwt.async timer) in
2017-05-26 14:30:34 +00:00
let rec loop () =
Lwt_unix.accept s >>= fun (cs, addr) ->
2019-10-10 20:26:36 +00:00
m `Open;
Lwt.async (fun () -> handle cs addr >|= fun () -> m `Close);
2017-05-26 14:30:34 +00:00
loop ()
in
loop ())
open Cmdliner
2019-03-27 23:11:43 +00:00
open Albatross_cli
2017-05-26 14:30:34 +00:00
let interval =
let doc = "Interval between statistics gatherings (in seconds)" in
2018-10-26 19:35:40 +00:00
Arg.(value & opt int 10 & info [ "interval" ] ~doc)
2017-05-26 14:30:34 +00:00
let cmd =
2019-10-10 20:26:36 +00:00
Term.(term_result (const jump $ setup_log $ interval $ influx)),
2019-03-27 23:11:43 +00:00
Term.info "albatross_stats" ~version:"%%VERSION_NUM%%"
2017-05-26 14:30:34 +00:00
let () = match Term.eval cmd with `Ok () -> exit 0 | _ -> exit 1