{-# language RoleAnnotations #-} {-# language GADTs #-} {-# language DataKinds #-} {-# language InstanceSigs #-} {-# language ScopedTypeVariables #-} {-# language StandaloneDeriving #-} {-# language DerivingStrategies #-} {-# language TypeApplications #-} {-# language TypeFamilies #-} {-# options_ghc -Wall -Werror #-} module Bloom (singleton, insert, Bloom, elem, assocs, hexdump) where import Prelude hiding (elem) import Data.Singletons import Data.Kind (Type) import Data.Array.Unboxed (UArray, listArray, (!)) import qualified Data.Array.Unboxed as Array import GHC.Types (Nat) import GHC.TypeLits (KnownNat, natVal) import Data.Hashable (Hashable) import qualified Data.Hashable as Hashable import GHC.Exts (IsList(..)) import Data.List (sort) import Data.Binary import Control.Monad (when) import Data.Bits import Data.Foldable (foldl') import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as ByteString type role Bloom nominal nominal data Bloom :: Nat -> Type -> Type where Bloom :: forall a n . Sing n -> UArray Int Bool -> Bloom n a deriving stock instance Show (Bloom n a) instance Eq (Bloom n a) where Bloom _ a == Bloom _ b = a == b instance KnownNat n => Semigroup (Bloom n a) where (<>) :: Bloom n a -> Bloom n a -> Bloom n a Bloom _ a <> Bloom _ b = Bloom Sing c where c :: UArray Int Bool c = listArray (0, pred n) $ f <$> [0..] n :: Int n = fromIntegral $ natVal @n Proxy f :: Int -> Bool f i = a ! i || b ! i instance KnownNat n => Monoid (Bloom n a) where mempty :: Bloom n a mempty = Bloom Sing c where c :: UArray Int Bool c = listArray (0, pred n) $ repeat False n :: Int n = fromIntegral $ natVal @n Proxy instance KnownNat n => Binary (Bloom n a) where -- This is not really a valid instance, because we don't know what -- `n` and `a` was used to encode what we're `get`ing. get = do n <- get @Integer let n' = natVal @n Proxy when (n /= n') $ fail $ "Bloom.get: Wrong bucket size. Expected " <> show n' <> ". Got: " <> show n <> "." Bloom Sing . listArray (0, fromInteger (pred n)) . unpack <$> get @ByteString put b = do put $ natVal @n Proxy put @ByteString $ pack $ snd <$> assocs b singleton :: KnownNat n => Hashable a => a -> Bloom n a singleton a = insert a mempty insert :: forall n a . KnownNat n => Hashable a => a -> Bloom n a -> Bloom n a insert a (Bloom _ b) = Bloom Sing c where c :: UArray Int Bool c = listArray (0, pred n) $ f <$> [0..n] n :: Int n = fromIntegral $ natVal @n Proxy h :: Int h = hash n a f :: Int -> Bool f i = b ! i || h == i hash :: Hashable a => Int -> a -> Int hash n = (`mod` n) . Hashable.hash instance (KnownNat n, Hashable a) => IsList (Bloom n a) where type Item (Bloom n a) = a fromList :: [] a -> Bloom n a fromList = Bloom Sing . indicesToArray (pred n) . fmap (hash n) where n :: Int n = fromIntegral $ natVal @n Proxy toList = error "Unimplemented" indicesToArray :: Int -> [] Int -> UArray Int Bool indicesToArray n = listArray (0, n) . indicesToList indicesToList :: [] Int -> [] Bool indicesToList xs0 = take (succ $ maximum xs0) $ sort xs0 <| [0..] where (<|) :: [] Int -> [] Int -> [] Bool [] <| _ = repeat False xs@(x:xss) <| (y:yss) = if x <= y then True : (xss <| yss) else False : (xs<|yss) _ <| _ = error "IMPOSSIBLE" elem :: forall a n . KnownNat n => Hashable a => a -> Bloom n a -> Bool elem a (Bloom _ r) = r ! hash n a where n = fromIntegral $ natVal @n Proxy assocs :: Bloom n a -> [(Int, Bool)] assocs (Bloom _ b) = Array.assocs b pack :: [] Bool -> ByteString pack = ByteString.pack . toWords unpack :: ByteString -> [] Bool unpack = (>>= fromWord) . ByteString.unpack toWords :: [] Bool -> [] Word8 toWords = fmap toWord . chunksOf 8 chunksOf :: Int -> [] a -> [[a]] chunksOf n xs = a : case b of [] -> [] ; _ -> chunksOf n b where a = take n xs b = drop n xs toWord :: Bits b => [] Bool -> b toWord xs = foldl' (.|.) zeroBits $ zipWith shift (fmap conv xs) [0..] where conv b = if b then bit 0 else zeroBits fromWord :: Word8 -> [] Bool fromWord w = testBit w <$> [0..7] hexdump :: Bloom n a -> String hexdump = hexdump' . toWords . fmap snd . assocs hexdump' :: [] Word8 -> String hexdump' = unlines . zipWith (\n s -> step16 n <> s) [0..] . ls where ls :: [Word8] -> [String] ls = fmap unwords . chunksOf 16 . fmap step8 step16 :: Word16 -> [] Char step16 w0 = [r ! fromIntegral d0, r ! fromIntegral d1, r ! fromIntegral d2, r ! fromIntegral d3] where (d0, w1) = w0 `divMod` 0x10 (d1, w2) = w1 `divMod` 0x10 (d2, w3) = w2 `divMod` 0x10 (d3, _ ) = w3 `divMod` 0x10 step8 :: Word8 -> [] Char step8 w = [r ! a, r ! b] where (a, b) = w `divMod` 0x10 r :: UArray Word8 Char r = listArray (0, 15) (['0'..'9'] <> ['a'..'f'])