bloom/Bloom.hs

161 lines
4.8 KiB
Haskell

{-# language RoleAnnotations #-}
{-# language GADTs #-}
{-# language DataKinds #-}
{-# language InstanceSigs #-}
{-# language ScopedTypeVariables #-}
{-# language StandaloneDeriving #-}
{-# language DerivingStrategies #-}
{-# language TypeApplications #-}
{-# language TypeFamilies #-}
{-# options_ghc -Wall -Werror #-}
module Bloom (singleton, insert, Bloom, elem, assocs, hexdump) where
import Prelude hiding (elem)
import Data.Singletons
import Data.Kind (Type)
import Data.Array.Unboxed (UArray, listArray, (!))
import qualified Data.Array.Unboxed as Array
import GHC.Types (Nat)
import GHC.TypeLits (KnownNat, natVal)
import Data.Hashable (Hashable)
import qualified Data.Hashable as Hashable
import GHC.Exts (IsList(..))
import Data.List (sort)
import Data.Binary
import Control.Monad (when)
import Data.Bits
import Data.Foldable (foldl')
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as ByteString
type role Bloom nominal nominal
data Bloom :: Nat -> Type -> Type where
Bloom :: forall a n . Sing n -> UArray Int Bool -> Bloom n a
deriving stock instance Show (Bloom n a)
instance Eq (Bloom n a) where
Bloom _ a == Bloom _ b = a == b
instance KnownNat n => Semigroup (Bloom n a) where
(<>) :: Bloom n a -> Bloom n a -> Bloom n a
Bloom _ a <> Bloom _ b = Bloom Sing c
where
c :: UArray Int Bool
c = listArray (0, pred n) $ f <$> [0..]
n :: Int
n = fromIntegral $ natVal @n Proxy
f :: Int -> Bool
f i = a ! i || b ! i
instance KnownNat n => Monoid (Bloom n a) where
mempty :: Bloom n a
mempty = Bloom Sing c
where
c :: UArray Int Bool
c = listArray (0, pred n) $ repeat False
n :: Int
n = fromIntegral $ natVal @n Proxy
instance KnownNat n => Binary (Bloom n a) where
-- This is not really a valid instance, because we don't know what
-- `n` and `a` was used to encode what we're `get`ing.
get = do
n <- get @Integer
let n' = natVal @n Proxy
when (n /= n') $ fail $ "Bloom.get: Wrong bucket size. Expected " <> show n' <> ". Got: " <> show n <> "."
Bloom Sing . listArray (0, fromInteger (pred n)) . unpack <$> get @ByteString
put b = do
put $ natVal @n Proxy
put @ByteString $ pack $ snd <$> assocs b
singleton :: KnownNat n => Hashable a => a -> Bloom n a
singleton a = insert a mempty
insert :: forall n a . KnownNat n => Hashable a => a -> Bloom n a -> Bloom n a
insert a (Bloom _ b) = Bloom Sing c
where
c :: UArray Int Bool
c = listArray (0, pred n) $ f <$> [0..n]
n :: Int
n = fromIntegral $ natVal @n Proxy
h :: Int
h = hash n a
f :: Int -> Bool
f i = b ! i || h == i
hash :: Hashable a => Int -> a -> Int
hash n = (`mod` n) . Hashable.hash
instance (KnownNat n, Hashable a) => IsList (Bloom n a) where
type Item (Bloom n a) = a
fromList :: [] a -> Bloom n a
fromList = Bloom Sing . indicesToArray (pred n) . fmap (hash n)
where
n :: Int
n = fromIntegral $ natVal @n Proxy
toList = error "Unimplemented"
indicesToArray :: Int -> [] Int -> UArray Int Bool
indicesToArray n = listArray (0, n) . indicesToList
indicesToList :: [] Int -> [] Bool
indicesToList xs0 = take (succ $ maximum xs0) $ sort xs0 <| [0..]
where
(<|) :: [] Int -> [] Int -> [] Bool
[] <| _ = repeat False
xs@(x:xss) <| (y:yss) = if x <= y then True : (xss <| yss) else False : (xs<|yss)
_ <| _ = error "IMPOSSIBLE"
elem :: forall a n . KnownNat n => Hashable a => a -> Bloom n a -> Bool
elem a (Bloom _ r) = r ! hash n a
where
n = fromIntegral $ natVal @n Proxy
assocs :: Bloom n a -> [(Int, Bool)]
assocs (Bloom _ b) = Array.assocs b
pack :: [] Bool -> ByteString
pack = ByteString.pack . toWords
unpack :: ByteString -> [] Bool
unpack = (>>= fromWord) . ByteString.unpack
toWords :: [] Bool -> [] Word8
toWords = fmap toWord . chunksOf 8
chunksOf :: Int -> [] a -> [[a]]
chunksOf n xs = a : case b of [] -> [] ; _ -> chunksOf n b
where
a = take n xs
b = drop n xs
toWord :: Bits b => [] Bool -> b
toWord xs = foldl' (.|.) zeroBits $ zipWith shift (fmap conv xs) [0..]
where
conv b = if b then bit 0 else zeroBits
fromWord :: Word8 -> [] Bool
fromWord w = testBit w <$> [0..7]
hexdump :: Bloom n a -> String
hexdump = hexdump' . toWords . fmap snd . assocs
hexdump' :: [] Word8 -> String
hexdump' = unlines . zipWith (\n s -> step16 n <> s) [0..] . ls
where
ls :: [Word8] -> [String]
ls = fmap unwords . chunksOf 16 . fmap step8
step16 :: Word16 -> [] Char
step16 w0 = [r ! fromIntegral d0, r ! fromIntegral d1, r ! fromIntegral d2, r ! fromIntegral d3]
where
(d0, w1) = w0 `divMod` 0x10
(d1, w2) = w1 `divMod` 0x10
(d2, w3) = w2 `divMod` 0x10
(d3, _ ) = w3 `divMod` 0x10
step8 :: Word8 -> [] Char
step8 w = [r ! a, r ! b]
where
(a, b) = w `divMod` 0x10
r :: UArray Word8 Char
r = listArray (0, 15) (['0'..'9'] <> ['a'..'f'])