From db8ae1ee37ca73493c94f06d3ab66bb4a874bc80 Mon Sep 17 00:00:00 2001 From: Hannes Mehnert Date: Sun, 18 Mar 2018 18:07:14 +0000 Subject: [PATCH] compression, fixes #6 --- .merlin | 2 +- _tags | 7 ++- opam | 1 + pkg/META | 2 +- provision/vmm_req_vm.ml | 20 +++++-- src/vmm_asn.ml | 7 ++- src/vmm_commands.ml | 7 ++- src/vmm_compress.ml | 128 ++++++++++++++++++++++++++++++++++++++++ src/vmm_core.ml | 3 +- 9 files changed, 163 insertions(+), 14 deletions(-) create mode 100644 src/vmm_compress.ml diff --git a/.merlin b/.merlin index 09b0e65..aaed434 100644 --- a/.merlin +++ b/.merlin @@ -6,4 +6,4 @@ S provision B _build/** PKG topkg logs ipaddr x509 tls rresult bos lwt cmdliner hex cstruct.ppx duration -PKG ptime ptime.clock.os ipaddr.unix \ No newline at end of file +PKG ptime ptime.clock.os ipaddr.unix decompress \ No newline at end of file diff --git a/_tags b/_tags index 0c98ee7..f3fe1ff 100644 --- a/_tags +++ b/_tags @@ -3,16 +3,17 @@ true : warn(+A-44) true : package(rresult logs ipaddr x509 tls bos hex ptime ptime.clock.os astring duration cstruct) "src" : include +: package(decompress) : package(ppx_cstruct) : package(asn1-combinators) : package(lwt) : package(lwt tls.lwt) -: package(lwt.unix cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt ipaddr.unix) +: package(lwt.unix cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt ipaddr.unix decompress) : package(nocrypto tls.lwt nocrypto.lwt) : package(tls.lwt) : package(nocrypto tls.lwt nocrypto.lwt) -: package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty asn1-combinators nocrypto.unix lwt) +: package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty asn1-combinators nocrypto.unix lwt decompress) -: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt) \ No newline at end of file +: link_vmm_stats, package(cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt.unix lwt decompress) \ No newline at end of file diff --git a/opam b/opam index 22ad851..3ace94e 100644 --- a/opam +++ b/opam @@ -27,6 +27,7 @@ depends: [ "nocrypto" "asn1-combinators" {>= "0.2.0"} "duration" + "decompress" {>= "0.7"} ] build: [ diff --git a/pkg/META b/pkg/META index aa06bc0..9f42198 100644 --- a/pkg/META +++ b/pkg/META @@ -1,6 +1,6 @@ description = "VM Manager" version = "%%VERSION_NUM%%" -requires = "rresult logs ipaddr x509 tls bos hex ptime ptime.clock.os astring duration asn1-combinators lwt tls.lwt" +requires = "rresult logs ipaddr x509 tls bos hex ptime ptime.clock.os astring duration asn1-combinators lwt tls.lwt decompress" archive(byte) = "vmm.cma" archive(native) = "vmm.cmxa" plugin(byte) = "vmm.cma" diff --git a/provision/vmm_req_vm.ml b/provision/vmm_req_vm.ml index 1a7cc37..512abd4 100644 --- a/provision/vmm_req_vm.ml +++ b/provision/vmm_req_vm.ml @@ -6,7 +6,7 @@ open Rresult.R.Infix open Vmm_asn -let vm_csr key name image cpu mem args block net force = +let vm_csr key name image cpu mem args block net force compression = let block = match block with | None -> [] | Some x -> [ (false, `Unsupported (Oid.block_device, string_to_cstruct x)) ] @@ -18,24 +18,30 @@ let vm_csr key name image cpu mem args block net force = | xs -> [ (false, `Unsupported (Oid.network, strings_to_cstruct xs)) ] and cmd = if force then `Force_create else `Create in + let image = match compression with + | 0 -> image_to_cstruct (`Ukvm_amd64, image) + | level -> + let img = Vmm_compress.compress ~level (Cstruct.to_string image) in + image_to_cstruct (`Ukvm_amd64_compressed, Cstruct.of_string img) + in let exts = [ (false, `Unsupported (Oid.version, version_to_cstruct asn_version)) ; (false, `Unsupported (Oid.cpuid, int_to_cstruct cpu)) ; (false, `Unsupported (Oid.memory, int_to_cstruct mem)) ; - (false, `Unsupported (Oid.vmimage, image_to_cstruct (`Ukvm_amd64, image))) ; + (false, `Unsupported (Oid.vmimage, image)) ; (false, `Unsupported (Oid.permissions, permissions_to_cstruct [ cmd ])) ; ] @ block @ arg @ net and name = [ `CN name ] in X509.CA.request name ~extensions:[`Extensions exts] key -let jump _ name key image mem cpu args block net force = +let jump _ name key image mem cpu args block net force compression = Nocrypto_entropy_unix.initialize () ; match priv_key key name >>= fun key -> (Bos.OS.File.read (Fpath.v image) >>= fun s -> Ok (Cstruct.of_string s)) >>= fun image -> - let csr = vm_csr key name image cpu mem args block net force in + let csr = vm_csr key name image cpu mem args block net force compression in let enc = X509.Encoding.Pem.Certificate_signing_request.to_pem_cstruct1 csr in Bos.OS.File.write Fpath.(v name + ".req") (Cstruct.to_string enc) with @@ -68,8 +74,12 @@ let force = let doc = "Force creation (destroy VM with same name if it exists)" in Arg.(value & flag & info [ "force" ] ~doc) +let compress_level = + let doc = "Compression level (0 for no compression)" in + Arg.(value & opt int 4 & info [ "compression-level" ] ~doc) + let cmd = - Term.(ret (const jump $ setup_log $ nam $ key $ image $ mem $ cpu $ args $ block $ net $ force)), + Term.(ret (const jump $ setup_log $ nam $ key $ image $ mem $ cpu $ args $ block $ net $ force $ compress_level)), Term.info "vmm_req_vm" ~version:"%%VERSION_NUM%%" let () = match Term.eval cmd with `Ok () -> exit 0 | _ -> exit 1 diff --git a/src/vmm_asn.ml b/src/vmm_asn.ml index 59ba520..da067e0 100644 --- a/src/vmm_asn.ml +++ b/src/vmm_asn.ml @@ -103,14 +103,17 @@ let image = let f = function | `C1 x -> `Ukvm_amd64, x | `C2 x -> `Ukvm_arm64, x + | `C3 x -> `Ukvm_amd64_compressed, x and g = function | `Ukvm_amd64, x -> `C1 x | `Ukvm_arm64, x -> `C2 x + | `Ukvm_amd64_compressed, x -> `C3 x in Asn.S.map f g @@ - Asn.S.(choice2 + Asn.S.(choice3 (explicit 0 octet_string) - (explicit 1 octet_string)) + (explicit 1 octet_string) + (explicit 2 octet_string)) let image_of_cstruct, image_to_cstruct = projections_of image diff --git a/src/vmm_commands.ml b/src/vmm_commands.ml index 4379e2d..d036c4e 100644 --- a/src/vmm_commands.ml +++ b/src/vmm_commands.ml @@ -125,7 +125,12 @@ let prepare vm = let tmpfile = tmpfile vm in (match vm.vmimage with | `Ukvm_amd64, blob -> Ok blob - | _ -> Error (`Msg "no amd64 ukvm image found")) >>= fun image -> + | `Ukvm_amd64_compressed, blob -> + begin match Vmm_compress.uncompress (Cstruct.to_string blob) with + | Ok blob -> Ok (Cstruct.of_string blob) + | Error () -> Error (`Msg "failed to uncompress") + end + | `Ukvm_arm64, _ -> Error (`Msg "no amd64 ukvm image found")) >>= fun image -> Bos.OS.File.write (image_fn tmpfile) (Cstruct.to_string image) >>= fun () -> let fifo = fifo_fn tmpfile in (match fifo_exists fifo with diff --git a/src/vmm_compress.ml b/src/vmm_compress.ml new file mode 100644 index 0000000..ffa5bc7 --- /dev/null +++ b/src/vmm_compress.ml @@ -0,0 +1,128 @@ +(* copied n 2018-03-18 from github.com:mirage/decompress.git (MIT licensed) at + db86cf8a57ab1b4fb21e10f99093bdae425d48db by Hannes Mehnert + + TODO: should use Deflate/Inflate.bigstring instead of bytes (to avoid + unnecessary copies) +*) + +open Decompress + +(* Keep in your mind, this is an easy example of Decompress but not efficient. + Don't copy/paste this code in a productive environment. +*) + +let compress ?(level = 4) data = + let input_buffer = Bytes.create 0xFFFF in + (* We need to allocate an input buffer, the size of this buffer is important. + In fact, the Lz77 algorithm can find a pattern (and compress) only on + this input buffer. So if the input buffer is small, the algorithm has no + chance to find many patterns. + + If it is big, the algorithm can find a far pattern and keep this pattern + as long as it tries to compress. The optimal size seems to be [1 << 15] + bytes (a bigger buffer is not necessary because the distance can be upper + than [1 << 15]). + *) + let output_buffer = Bytes.create 0xFFFF in + (* We need to allocate an output buffer, is like you can. it's depends your + capabilities of your writing. + *) + + let pos = ref 0 in + let res = Buffer.create (String.length data) in + (* The buffer is not a good idea. In fact, we can have a memory problem with + that (like if the output is too big). You need to keep in your mind that is + insecure to let a buffer to grow automatically (an attacker can use this + behaviour). + *) + + (* This is the same interface as [caml-zip]. A refiller and a flusher. The + refiller send you the maximum byte than you can [blit] inside the input + buffer. + + So, if the second argument is [Some max], it's mandatory to respect that, + otherwise, you lost something. In the other case, you can blit the maximum + that what you can. + + The flusher send you the output buffer and how many byte Decompress wrote + inside. The offset for this buffer is always [0]. Then, you need to send + how many bytes are free in the output buffer (and the common is that all + is free). + + One argument (optionnal) is missing, it's the [meth]. This argument is + used to limit the memory used by the state internally. In fact, Decompress + (and `zlib`) need to keep all of your input to calculate at the end the + frequencies and the dictionarie. So if you want to compress a big file, + may be you will have a memory problem (because, all your file will be + present in the memory). So you can specify a method to flush the internal + memory (with SYNC, PARTIAL or FULL - see the documentation about that) at + each [n] bytes, like: ~meth:(PARTIAL, 4096) flushes the internal memory + when we compute 4096 bytes of your input. + + If [meth] is specified, the refiller has a [Some] as the second parameter. + Otherwise, it is [None]. + *) + match + Deflate.bytes + input_buffer output_buffer + (fun input_buffer -> function + | Some max -> + let n = min max (min 0xFFFF (String.length data - !pos)) in + Bytes.blit_string data !pos input_buffer 0 n; + pos := !pos + n; + n + | None -> + let n = min 0xFFFF (String.length data - !pos) in + Bytes.blit_string data !pos input_buffer 0 n; + pos := !pos + n; + n) + (fun output_buffer len -> + Buffer.add_subbytes res output_buffer 0 len; + 0xFFFF) + (Deflate.default ~proof:B.proof_bytes level) + (* We can specify the level of the compression, see the documentation to know + what we use for each level. The default is 4. + *) + with + | Ok _ -> Buffer.contents res + | Error e -> + Logs.err (fun m -> m "error %a while compressing" Deflate.pp_error e) ; + invalid_arg "cannot compress" + +let uncompress data = + let input_buffer = Bytes.create 0xFFFF in + (* We need to allocate an input buffer. it's depends your capabilities of + your reading. + *) + let output_buffer = Bytes.create 0xFFFF in + (* Same as [compress]. *) + let window = Window.create ~proof:B.proof_bytes in + (* We allocate a window. We let the user to do that to reuse the window if + it's needed. In fact, the window is a big buffer ([size = (1 << 15)]) and + allocate this buffer costs. + + So in this case, we decompress only one time but if you want to decompress + some flows, you can reuse this window after a [Window.reset]. + *) + + let pos = ref 0 in + let res = Buffer.create (String.length data) in + + match + Inflate.bytes + input_buffer output_buffer + (* Same logic as [compress]. *) + (fun input_buffer -> + let n = min 0xFFFF (String.length data - !pos) in + Bytes.blit_string data !pos input_buffer 0 n; + pos := !pos + n; + n) + (fun output_buffer len -> + Buffer.add_subbytes res output_buffer 0 len; + 0xFFFF) + (Inflate.default window) + with + | Ok _ -> Ok (Buffer.contents res) + | Error exn -> + Logs.err (fun m -> m "error %a while uncompressing" Inflate.pp_error exn) ; + Error () diff --git a/src/vmm_core.ml b/src/vmm_core.ml index 40b0c5e..fa17695 100644 --- a/src/vmm_core.ml +++ b/src/vmm_core.ml @@ -84,10 +84,11 @@ let cmd_allowed permissions cmd = in List.mem perm permissions -type vmtype = [ `Ukvm_amd64 | `Ukvm_arm64 ] +type vmtype = [ `Ukvm_amd64 | `Ukvm_arm64 | `Ukvm_amd64_compressed ] let pp_vmtype ppf = function | `Ukvm_amd64 -> Fmt.pf ppf "ukvm-amd64" + | `Ukvm_amd64_compressed -> Fmt.pf ppf "ukvm-amd64-compressed" | `Ukvm_arm64 -> Fmt.pf ppf "ukvm-arm64" type id = string list