albatross/src/vmm_compress.ml

112 lines
5 KiB
OCaml
Raw Normal View History

2018-10-28 23:05:55 +00:00
(* copied n 2018-03-18 from github.com:mirage/decompress.git (bin/easy.ml)
(MIT licensed) at fa1551b19165503fc77da6da99411fa59b6a7f6a by Hannes Mehnert
2018-03-18 18:07:14 +00:00
*)
(* edited to reflect later API (v0.9.0) changes on 2019-07-16 *)
module Stdlib_buffer = Buffer
2018-03-18 18:07:14 +00:00
open Decompress
(* Keep in your mind, this is an easy example of Decompress but not efficient.
2018-10-28 23:05:55 +00:00
Don't copy/paste this code in a productive environment. *)
2018-03-18 18:07:14 +00:00
let compress ?(level = 4) data =
let input_buffer = Bytes.create 0xFFFF in
(* We need to allocate an input buffer, the size of this buffer is important.
2018-10-28 23:05:55 +00:00
In fact, the Lz77 algorithm can find a pattern (and compress) only on this
input buffer. So if the input buffer is small, the algorithm has no chance
to find many patterns.
2018-03-18 18:07:14 +00:00
If it is big, the algorithm can find a far pattern and keep this pattern
as long as it tries to compress. The optimal size seems to be [1 << 15]
bytes (a bigger buffer is not necessary because the distance can be upper
2018-10-28 23:05:55 +00:00
than [1 << 15]). *)
2018-03-18 18:07:14 +00:00
let output_buffer = Bytes.create 0xFFFF in
(* We need to allocate an output buffer, is like you can. it's depends your
2018-10-28 23:05:55 +00:00
capabilities of your writing. *)
2018-03-18 18:07:14 +00:00
let pos = ref 0 in
let res = Stdlib_buffer.create (String.length data) in
2018-03-18 18:07:14 +00:00
(* The buffer is not a good idea. In fact, we can have a memory problem with
2018-10-28 23:05:55 +00:00
that (like if the output is too big). You need to keep in your mind that
is insecure to let a buffer to grow automatically (an attacker can use
this behaviour). *)
2018-03-18 18:07:14 +00:00
(* This is the same interface as [caml-zip]. A refiller and a flusher. The
refiller send you the maximum byte than you can [blit] inside the input
buffer.
So, if the second argument is [Some max], it's mandatory to respect that,
otherwise, you lost something. In the other case, you can blit the maximum
that what you can.
The flusher send you the output buffer and how many byte Decompress wrote
inside. The offset for this buffer is always [0]. Then, you need to send
how many bytes are free in the output buffer (and the common is that all
is free).
One argument (optionnal) is missing, it's the [meth]. This argument is
used to limit the memory used by the state internally. In fact, Decompress
(and `zlib`) need to keep all of your input to calculate at the end the
2018-10-28 23:05:55 +00:00
frequencies and the dictionary. So if you want to compress a big file, may
be you will have a memory problem (because, all your file will be present
in the memory). So you can specify a method to flush the internal memory
(with SYNC, PARTIAL or FULL - see the documentation about that) at each
[n] bytes, like: ~meth:(PARTIAL, 4096) flushes the internal memory when we
compute 4096 bytes of your input.
2018-03-18 18:07:14 +00:00
If [meth] is specified, the refiller has a [Some] as the second parameter.
2018-10-28 23:05:55 +00:00
Otherwise, it is [None]. *)
Zlib_deflate.bytes input_buffer output_buffer
(fun input_buffer -> function
| Some max ->
let n = min max (min 0xFFFF (String.length data - !pos)) in
Bytes.blit_string data !pos input_buffer 0 n ;
pos := !pos + n ;
n
| None ->
let n = min 0xFFFF (String.length data - !pos) in
Bytes.blit_string data !pos input_buffer 0 n ;
pos := !pos + n ;
n )
(fun output_buffer len ->
Stdlib_buffer.add_subbytes res output_buffer 0 len ;
2018-10-28 23:05:55 +00:00
0xFFFF )
(Zlib_deflate.default ~witness:Buffer.bytes level)
2018-03-18 18:07:14 +00:00
(* We can specify the level of the compression, see the documentation to know
2018-10-28 23:05:55 +00:00
what we use for each level. The default is 4. *)
|> function
| Ok _ -> Stdlib_buffer.contents res
2018-03-18 18:07:14 +00:00
| Error e ->
2018-10-28 23:05:55 +00:00
Logs.err (fun m -> m "error %a while compressing" Zlib_deflate.pp_error e) ;
2018-03-18 18:07:14 +00:00
invalid_arg "cannot compress"
let uncompress data =
let input_buffer = Bytes.create 0xFFFF in
(* We need to allocate an input buffer. it's depends your capabilities of
2018-10-28 23:05:55 +00:00
your reading. *)
2018-03-18 18:07:14 +00:00
let output_buffer = Bytes.create 0xFFFF in
(* Same as [compress]. *)
let window = Window.create ~crc:Window.adler32 ~witness:Buffer.bytes in
2018-03-18 18:07:14 +00:00
(* We allocate a window. We let the user to do that to reuse the window if
it's needed. In fact, the window is a big buffer ([size = (1 << 15)]) and
allocate this buffer costs.
So in this case, we decompress only one time but if you want to decompress
2018-10-28 23:05:55 +00:00
some flows, you can reuse this window after a [Window.reset]. *)
2018-03-18 18:07:14 +00:00
let pos = ref 0 in
let res = Stdlib_buffer.create (String.length data) in
2018-10-28 23:05:55 +00:00
Zlib_inflate.bytes input_buffer output_buffer
(* Same logic as [compress]. *)
2018-03-18 18:07:14 +00:00
(fun input_buffer ->
2018-10-28 23:05:55 +00:00
let n = min 0xFFFF (String.length data - !pos) in
Bytes.blit_string data !pos input_buffer 0 n ;
pos := !pos + n ;
n )
(fun output_buffer len ->
Stdlib_buffer.add_subbytes res output_buffer 0 len ;
2018-10-28 23:05:55 +00:00
0xFFFF )
(Zlib_inflate.default ~witness:Buffer.bytes window)
2018-10-28 23:05:55 +00:00
|> function
| Ok _ -> Ok (Stdlib_buffer.contents res)
2018-03-18 18:07:14 +00:00
| Error exn ->
2018-10-28 23:05:55 +00:00
Logs.err (fun m -> m "error %a while uncompressing" Zlib_inflate.pp_error exn) ;
2018-03-18 18:07:14 +00:00
Error ()