upgrade to recent decompress

This commit is contained in:
Hannes Mehnert 2018-10-29 00:05:55 +01:00
parent 0ce16cbf6b
commit a124b3eb30
3 changed files with 57 additions and 74 deletions

7
_tags
View file

@ -1,19 +1,20 @@
true : bin_annot, safe_string, principal, color(always) true : bin_annot, safe_string, principal, color(always)
true : warn(+A-4-44-48) true : warn(+A-4-44-48)
true : package(rresult logs ipaddr bos hex ptime astring duration cstruct decompress asn1-combinators) true : package(rresult logs ipaddr bos hex ptime astring duration cstruct decompress)
"src" : include "src" : include
<src/vmm_lwt.{ml,mli}>: package(lwt lwt.unix) <src/vmm_lwt.{ml,mli}>: package(lwt lwt.unix)
<src/vmm_tls_lwt.{ml,mli}>: package(lwt tls.lwt) <src/vmm_tls_lwt.{ml,mli}>: package(lwt tls.lwt)
<src/vmm_tls.{ml,mli}>: package(x509) <src/vmm_tls.{ml,mli}>: package(x509)
<src/vmm_vmmd.{ml,mli}>: package(ptime.clock.os) <src/vmm_vmmd.{ml,mli}>: package(ptime.clock.os)
<src/vmm_asn.{ml,mli}>: package(asn1-combinators)
<app/*>: package(lwt.unix cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt ipaddr.unix) <app/*>: package(checkseum.c lwt.unix cmdliner logs.fmt fmt.cli logs.cli fmt.tty lwt ipaddr.unix asn1-combinators)
<app/vmmd.{ml,native,byte}>: package(ptime.clock.os) <app/vmmd.{ml,native,byte}>: package(ptime.clock.os)
<app/vmmd_console.{ml,native,byte}>: package(ptime.clock.os) <app/vmmd_console.{ml,native,byte}>: package(ptime.clock.os)
<app/vmmd_log.{ml,native,byte}>: package(ptime.clock.os) <app/vmmd_log.{ml,native,byte}>: package(ptime.clock.os)
<app/vmmd_tls.{ml,native,byte}>: package(tls.lwt ptime.clock.os) <app/vmmd_tls.{ml,native,byte}>: package(tls.lwt ptime.clock.os)
<app/vmmd_stats.{ml,native,byte}>: link_vmm_stats, package(asn1-combinators) <app/vmmd_stats.{ml,native,byte}>: link_vmm_stats
<app/vmmc_remote.{ml,native,byte}>: package(nocrypto tls.lwt nocrypto.lwt) <app/vmmc_remote.{ml,native,byte}>: package(nocrypto tls.lwt nocrypto.lwt)
<app/vmmc_bistro.{ml,native,byte}>: package(nocrypto tls.lwt nocrypto.lwt) <app/vmmc_bistro.{ml,native,byte}>: package(nocrypto tls.lwt nocrypto.lwt)

2
opam
View file

@ -26,7 +26,7 @@ depends: [
"nocrypto" "nocrypto"
"asn1-combinators" {>= "0.2.0"} "asn1-combinators" {>= "0.2.0"}
"duration" "duration"
"decompress" {= "0.7"} "decompress" {>= "0.8.1"}
] ]
build: [ build: [

View file

@ -1,40 +1,32 @@
(* copied n 2018-03-18 from github.com:mirage/decompress.git (MIT licensed) at (* copied n 2018-03-18 from github.com:mirage/decompress.git (bin/easy.ml)
db86cf8a57ab1b4fb21e10f99093bdae425d48db by Hannes Mehnert (MIT licensed) at fa1551b19165503fc77da6da99411fa59b6a7f6a by Hannes Mehnert
TODO: should use Deflate/Inflate.bigstring instead of bytes (to avoid
unnecessary copies)
*) *)
open Decompress open Decompress
(* Keep in your mind, this is an easy example of Decompress but not efficient. (* Keep in your mind, this is an easy example of Decompress but not efficient.
Don't copy/paste this code in a productive environment. Don't copy/paste this code in a productive environment. *)
*)
let compress ?(level = 4) data = let compress ?(level = 4) data =
let input_buffer = Bytes.create 0xFFFF in let input_buffer = Bytes.create 0xFFFF in
(* We need to allocate an input buffer, the size of this buffer is important. (* We need to allocate an input buffer, the size of this buffer is important.
In fact, the Lz77 algorithm can find a pattern (and compress) only on In fact, the Lz77 algorithm can find a pattern (and compress) only on this
this input buffer. So if the input buffer is small, the algorithm has no input buffer. So if the input buffer is small, the algorithm has no chance
chance to find many patterns. to find many patterns.
If it is big, the algorithm can find a far pattern and keep this pattern If it is big, the algorithm can find a far pattern and keep this pattern
as long as it tries to compress. The optimal size seems to be [1 << 15] as long as it tries to compress. The optimal size seems to be [1 << 15]
bytes (a bigger buffer is not necessary because the distance can be upper bytes (a bigger buffer is not necessary because the distance can be upper
than [1 << 15]). than [1 << 15]). *)
*)
let output_buffer = Bytes.create 0xFFFF in let output_buffer = Bytes.create 0xFFFF in
(* We need to allocate an output buffer, is like you can. it's depends your (* We need to allocate an output buffer, is like you can. it's depends your
capabilities of your writing. capabilities of your writing. *)
*)
let pos = ref 0 in let pos = ref 0 in
let res = Buffer.create (String.length data) in let res = Buffer.create (String.length data) in
(* The buffer is not a good idea. In fact, we can have a memory problem with (* The buffer is not a good idea. In fact, we can have a memory problem with
that (like if the output is too big). You need to keep in your mind that is that (like if the output is too big). You need to keep in your mind that
insecure to let a buffer to grow automatically (an attacker can use this is insecure to let a buffer to grow automatically (an attacker can use
behaviour). this behaviour). *)
*)
(* This is the same interface as [caml-zip]. A refiller and a flusher. The (* This is the same interface as [caml-zip]. A refiller and a flusher. The
refiller send you the maximum byte than you can [blit] inside the input refiller send you the maximum byte than you can [blit] inside the input
@ -52,77 +44,67 @@ let compress ?(level = 4) data =
One argument (optionnal) is missing, it's the [meth]. This argument is One argument (optionnal) is missing, it's the [meth]. This argument is
used to limit the memory used by the state internally. In fact, Decompress used to limit the memory used by the state internally. In fact, Decompress
(and `zlib`) need to keep all of your input to calculate at the end the (and `zlib`) need to keep all of your input to calculate at the end the
frequencies and the dictionarie. So if you want to compress a big file, frequencies and the dictionary. So if you want to compress a big file, may
may be you will have a memory problem (because, all your file will be be you will have a memory problem (because, all your file will be present
present in the memory). So you can specify a method to flush the internal in the memory). So you can specify a method to flush the internal memory
memory (with SYNC, PARTIAL or FULL - see the documentation about that) at (with SYNC, PARTIAL or FULL - see the documentation about that) at each
each [n] bytes, like: ~meth:(PARTIAL, 4096) flushes the internal memory [n] bytes, like: ~meth:(PARTIAL, 4096) flushes the internal memory when we
when we compute 4096 bytes of your input. compute 4096 bytes of your input.
If [meth] is specified, the refiller has a [Some] as the second parameter. If [meth] is specified, the refiller has a [Some] as the second parameter.
Otherwise, it is [None]. Otherwise, it is [None]. *)
*) Zlib_deflate.bytes input_buffer output_buffer
match (fun input_buffer -> function
Deflate.bytes | Some max ->
input_buffer output_buffer let n = min max (min 0xFFFF (String.length data - !pos)) in
(fun input_buffer -> function Bytes.blit_string data !pos input_buffer 0 n ;
| Some max -> pos := !pos + n ;
let n = min max (min 0xFFFF (String.length data - !pos)) in n
Bytes.blit_string data !pos input_buffer 0 n; | None ->
pos := !pos + n; let n = min 0xFFFF (String.length data - !pos) in
n Bytes.blit_string data !pos input_buffer 0 n ;
| None -> pos := !pos + n ;
let n = min 0xFFFF (String.length data - !pos) in n )
Bytes.blit_string data !pos input_buffer 0 n; (fun output_buffer len ->
pos := !pos + n; Buffer.add_subbytes res output_buffer 0 len ;
n) 0xFFFF )
(fun output_buffer len -> (Zlib_deflate.default ~witness:B.bytes level)
Buffer.add_subbytes res output_buffer 0 len;
0xFFFF)
(Deflate.default ~proof:B.proof_bytes level)
(* We can specify the level of the compression, see the documentation to know (* We can specify the level of the compression, see the documentation to know
what we use for each level. The default is 4. what we use for each level. The default is 4. *)
*) |> function
with
| Ok _ -> Buffer.contents res | Ok _ -> Buffer.contents res
| Error e -> | Error e ->
Logs.err (fun m -> m "error %a while compressing" Deflate.pp_error e) ; Logs.err (fun m -> m "error %a while compressing" Zlib_deflate.pp_error e) ;
invalid_arg "cannot compress" invalid_arg "cannot compress"
let uncompress data = let uncompress data =
let input_buffer = Bytes.create 0xFFFF in let input_buffer = Bytes.create 0xFFFF in
(* We need to allocate an input buffer. it's depends your capabilities of (* We need to allocate an input buffer. it's depends your capabilities of
your reading. your reading. *)
*)
let output_buffer = Bytes.create 0xFFFF in let output_buffer = Bytes.create 0xFFFF in
(* Same as [compress]. *) (* Same as [compress]. *)
let window = Window.create ~proof:B.proof_bytes in let window = Window.create ~witness:B.bytes in
(* We allocate a window. We let the user to do that to reuse the window if (* We allocate a window. We let the user to do that to reuse the window if
it's needed. In fact, the window is a big buffer ([size = (1 << 15)]) and it's needed. In fact, the window is a big buffer ([size = (1 << 15)]) and
allocate this buffer costs. allocate this buffer costs.
So in this case, we decompress only one time but if you want to decompress So in this case, we decompress only one time but if you want to decompress
some flows, you can reuse this window after a [Window.reset]. some flows, you can reuse this window after a [Window.reset]. *)
*)
let pos = ref 0 in let pos = ref 0 in
let res = Buffer.create (String.length data) in let res = Buffer.create (String.length data) in
Zlib_inflate.bytes input_buffer output_buffer
match (* Same logic as [compress]. *)
Inflate.bytes
input_buffer output_buffer
(* Same logic as [compress]. *)
(fun input_buffer -> (fun input_buffer ->
let n = min 0xFFFF (String.length data - !pos) in let n = min 0xFFFF (String.length data - !pos) in
Bytes.blit_string data !pos input_buffer 0 n; Bytes.blit_string data !pos input_buffer 0 n ;
pos := !pos + n; pos := !pos + n ;
n) n )
(fun output_buffer len -> (fun output_buffer len ->
Buffer.add_subbytes res output_buffer 0 len; Buffer.add_subbytes res output_buffer 0 len ;
0xFFFF) 0xFFFF )
(Inflate.default window) (Zlib_inflate.default ~witness:B.bytes window)
with |> function
| Ok _ -> Ok (Buffer.contents res) | Ok _ -> Ok (Buffer.contents res)
| Error exn -> | Error exn ->
Logs.err (fun m -> m "error %a while uncompressing" Inflate.pp_error exn) ; Logs.err (fun m -> m "error %a while uncompressing" Zlib_inflate.pp_error exn) ;
Error () Error ()