161 lines
4.8 KiB
Haskell
161 lines
4.8 KiB
Haskell
|
{-# language RoleAnnotations #-}
|
||
|
{-# language GADTs #-}
|
||
|
{-# language DataKinds #-}
|
||
|
{-# language InstanceSigs #-}
|
||
|
{-# language ScopedTypeVariables #-}
|
||
|
{-# language StandaloneDeriving #-}
|
||
|
{-# language DerivingStrategies #-}
|
||
|
{-# language TypeApplications #-}
|
||
|
{-# language TypeFamilies #-}
|
||
|
{-# options_ghc -Wall -Werror #-}
|
||
|
module Bloom (singleton, insert, Bloom, elem, assocs, hexdump) where
|
||
|
|
||
|
import Prelude hiding (elem)
|
||
|
import Data.Singletons
|
||
|
import Data.Kind (Type)
|
||
|
import Data.Array.Unboxed (UArray, listArray, (!))
|
||
|
import qualified Data.Array.Unboxed as Array
|
||
|
import GHC.Types (Nat)
|
||
|
import GHC.TypeLits (KnownNat, natVal)
|
||
|
import Data.Hashable (Hashable)
|
||
|
import qualified Data.Hashable as Hashable
|
||
|
import GHC.Exts (IsList(..))
|
||
|
import Data.List (sort)
|
||
|
import Data.Binary
|
||
|
import Control.Monad (when)
|
||
|
import Data.Bits
|
||
|
import Data.Foldable (foldl')
|
||
|
import Data.ByteString.Lazy (ByteString)
|
||
|
import qualified Data.ByteString.Lazy as ByteString
|
||
|
|
||
|
type role Bloom nominal nominal
|
||
|
data Bloom :: Nat -> Type -> Type where
|
||
|
Bloom :: forall a n . Sing n -> UArray Int Bool -> Bloom n a
|
||
|
|
||
|
deriving stock instance Show (Bloom n a)
|
||
|
instance Eq (Bloom n a) where
|
||
|
Bloom _ a == Bloom _ b = a == b
|
||
|
|
||
|
instance KnownNat n => Semigroup (Bloom n a) where
|
||
|
(<>) :: Bloom n a -> Bloom n a -> Bloom n a
|
||
|
Bloom _ a <> Bloom _ b = Bloom Sing c
|
||
|
where
|
||
|
c :: UArray Int Bool
|
||
|
c = listArray (0, pred n) $ f <$> [0..]
|
||
|
n :: Int
|
||
|
n = fromIntegral $ natVal @n Proxy
|
||
|
f :: Int -> Bool
|
||
|
f i = a ! i || b ! i
|
||
|
|
||
|
instance KnownNat n => Monoid (Bloom n a) where
|
||
|
mempty :: Bloom n a
|
||
|
mempty = Bloom Sing c
|
||
|
where
|
||
|
c :: UArray Int Bool
|
||
|
c = listArray (0, pred n) $ repeat False
|
||
|
n :: Int
|
||
|
n = fromIntegral $ natVal @n Proxy
|
||
|
|
||
|
instance KnownNat n => Binary (Bloom n a) where
|
||
|
-- This is not really a valid instance, because we don't know what
|
||
|
-- `n` and `a` was used to encode what we're `get`ing.
|
||
|
get = do
|
||
|
n <- get @Integer
|
||
|
let n' = natVal @n Proxy
|
||
|
when (n /= n') $ fail $ "Bloom.get: Wrong bucket size. Expected " <> show n' <> ". Got: " <> show n <> "."
|
||
|
Bloom Sing . listArray (0, fromInteger (pred n)) . unpack <$> get @ByteString
|
||
|
put b = do
|
||
|
put $ natVal @n Proxy
|
||
|
put @ByteString $ pack $ snd <$> assocs b
|
||
|
|
||
|
singleton :: KnownNat n => Hashable a => a -> Bloom n a
|
||
|
singleton a = insert a mempty
|
||
|
|
||
|
insert :: forall n a . KnownNat n => Hashable a => a -> Bloom n a -> Bloom n a
|
||
|
insert a (Bloom _ b) = Bloom Sing c
|
||
|
where
|
||
|
c :: UArray Int Bool
|
||
|
c = listArray (0, pred n) $ f <$> [0..n]
|
||
|
n :: Int
|
||
|
n = fromIntegral $ natVal @n Proxy
|
||
|
h :: Int
|
||
|
h = hash n a
|
||
|
f :: Int -> Bool
|
||
|
f i = b ! i || h == i
|
||
|
|
||
|
hash :: Hashable a => Int -> a -> Int
|
||
|
hash n = (`mod` n) . Hashable.hash
|
||
|
|
||
|
instance (KnownNat n, Hashable a) => IsList (Bloom n a) where
|
||
|
type Item (Bloom n a) = a
|
||
|
fromList :: [] a -> Bloom n a
|
||
|
fromList = Bloom Sing . indicesToArray (pred n) . fmap (hash n)
|
||
|
where
|
||
|
n :: Int
|
||
|
n = fromIntegral $ natVal @n Proxy
|
||
|
toList = error "Unimplemented"
|
||
|
|
||
|
indicesToArray :: Int -> [] Int -> UArray Int Bool
|
||
|
indicesToArray n = listArray (0, n) . indicesToList
|
||
|
|
||
|
indicesToList :: [] Int -> [] Bool
|
||
|
indicesToList xs0 = take (succ $ maximum xs0) $ sort xs0 <| [0..]
|
||
|
where
|
||
|
(<|) :: [] Int -> [] Int -> [] Bool
|
||
|
[] <| _ = repeat False
|
||
|
xs@(x:xss) <| (y:yss) = if x <= y then True : (xss <| yss) else False : (xs<|yss)
|
||
|
_ <| _ = error "IMPOSSIBLE"
|
||
|
|
||
|
elem :: forall a n . KnownNat n => Hashable a => a -> Bloom n a -> Bool
|
||
|
elem a (Bloom _ r) = r ! hash n a
|
||
|
where
|
||
|
n = fromIntegral $ natVal @n Proxy
|
||
|
|
||
|
assocs :: Bloom n a -> [(Int, Bool)]
|
||
|
assocs (Bloom _ b) = Array.assocs b
|
||
|
|
||
|
pack :: [] Bool -> ByteString
|
||
|
pack = ByteString.pack . toWords
|
||
|
|
||
|
unpack :: ByteString -> [] Bool
|
||
|
unpack = (>>= fromWord) . ByteString.unpack
|
||
|
|
||
|
toWords :: [] Bool -> [] Word8
|
||
|
toWords = fmap toWord . chunksOf 8
|
||
|
|
||
|
chunksOf :: Int -> [] a -> [[a]]
|
||
|
chunksOf n xs = a : case b of [] -> [] ; _ -> chunksOf n b
|
||
|
where
|
||
|
a = take n xs
|
||
|
b = drop n xs
|
||
|
|
||
|
toWord :: Bits b => [] Bool -> b
|
||
|
toWord xs = foldl' (.|.) zeroBits $ zipWith shift (fmap conv xs) [0..]
|
||
|
where
|
||
|
conv b = if b then bit 0 else zeroBits
|
||
|
|
||
|
fromWord :: Word8 -> [] Bool
|
||
|
fromWord w = testBit w <$> [0..7]
|
||
|
|
||
|
hexdump :: Bloom n a -> String
|
||
|
hexdump = hexdump' . toWords . fmap snd . assocs
|
||
|
|
||
|
hexdump' :: [] Word8 -> String
|
||
|
hexdump' = unlines . zipWith (\n s -> step16 n <> s) [0..] . ls
|
||
|
where
|
||
|
ls :: [Word8] -> [String]
|
||
|
ls = fmap unwords . chunksOf 16 . fmap step8
|
||
|
step16 :: Word16 -> [] Char
|
||
|
step16 w0 = [r ! fromIntegral d0, r ! fromIntegral d1, r ! fromIntegral d2, r ! fromIntegral d3]
|
||
|
where
|
||
|
(d0, w1) = w0 `divMod` 0x10
|
||
|
(d1, w2) = w1 `divMod` 0x10
|
||
|
(d2, w3) = w2 `divMod` 0x10
|
||
|
(d3, _ ) = w3 `divMod` 0x10
|
||
|
step8 :: Word8 -> [] Char
|
||
|
step8 w = [r ! a, r ! b]
|
||
|
where
|
||
|
(a, b) = w `divMod` 0x10
|
||
|
r :: UArray Word8 Char
|
||
|
r = listArray (0, 15) (['0'..'9'] <> ['a'..'f'])
|