Hello world
This commit is contained in:
commit
7f0b3a166a
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
*.hi
|
||||
*.o
|
||||
*.bin
|
||||
Main
|
160
Bloom.hs
Normal file
160
Bloom.hs
Normal file
|
@ -0,0 +1,160 @@
|
|||
{-# language RoleAnnotations #-}
|
||||
{-# language GADTs #-}
|
||||
{-# language DataKinds #-}
|
||||
{-# language InstanceSigs #-}
|
||||
{-# language ScopedTypeVariables #-}
|
||||
{-# language StandaloneDeriving #-}
|
||||
{-# language DerivingStrategies #-}
|
||||
{-# language TypeApplications #-}
|
||||
{-# language TypeFamilies #-}
|
||||
{-# options_ghc -Wall -Werror #-}
|
||||
module Bloom (singleton, insert, Bloom, elem, assocs, hexdump) where
|
||||
|
||||
import Prelude hiding (elem)
|
||||
import Data.Singletons
|
||||
import Data.Kind (Type)
|
||||
import Data.Array.Unboxed (UArray, listArray, (!))
|
||||
import qualified Data.Array.Unboxed as Array
|
||||
import GHC.Types (Nat)
|
||||
import GHC.TypeLits (KnownNat, natVal)
|
||||
import Data.Hashable (Hashable)
|
||||
import qualified Data.Hashable as Hashable
|
||||
import GHC.Exts (IsList(..))
|
||||
import Data.List (sort)
|
||||
import Data.Binary
|
||||
import Control.Monad (when)
|
||||
import Data.Bits
|
||||
import Data.Foldable (foldl')
|
||||
import Data.ByteString.Lazy (ByteString)
|
||||
import qualified Data.ByteString.Lazy as ByteString
|
||||
|
||||
type role Bloom nominal nominal
|
||||
data Bloom :: Nat -> Type -> Type where
|
||||
Bloom :: forall a n . Sing n -> UArray Int Bool -> Bloom n a
|
||||
|
||||
deriving stock instance Show (Bloom n a)
|
||||
instance Eq (Bloom n a) where
|
||||
Bloom _ a == Bloom _ b = a == b
|
||||
|
||||
instance KnownNat n => Semigroup (Bloom n a) where
|
||||
(<>) :: Bloom n a -> Bloom n a -> Bloom n a
|
||||
Bloom _ a <> Bloom _ b = Bloom Sing c
|
||||
where
|
||||
c :: UArray Int Bool
|
||||
c = listArray (0, pred n) $ f <$> [0..]
|
||||
n :: Int
|
||||
n = fromIntegral $ natVal @n Proxy
|
||||
f :: Int -> Bool
|
||||
f i = a ! i || b ! i
|
||||
|
||||
instance KnownNat n => Monoid (Bloom n a) where
|
||||
mempty :: Bloom n a
|
||||
mempty = Bloom Sing c
|
||||
where
|
||||
c :: UArray Int Bool
|
||||
c = listArray (0, pred n) $ repeat False
|
||||
n :: Int
|
||||
n = fromIntegral $ natVal @n Proxy
|
||||
|
||||
instance KnownNat n => Binary (Bloom n a) where
|
||||
-- This is not really a valid instance, because we don't know what
|
||||
-- `n` and `a` was used to encode what we're `get`ing.
|
||||
get = do
|
||||
n <- get @Integer
|
||||
let n' = natVal @n Proxy
|
||||
when (n /= n') $ fail $ "Bloom.get: Wrong bucket size. Expected " <> show n' <> ". Got: " <> show n <> "."
|
||||
Bloom Sing . listArray (0, fromInteger (pred n)) . unpack <$> get @ByteString
|
||||
put b = do
|
||||
put $ natVal @n Proxy
|
||||
put @ByteString $ pack $ snd <$> assocs b
|
||||
|
||||
singleton :: KnownNat n => Hashable a => a -> Bloom n a
|
||||
singleton a = insert a mempty
|
||||
|
||||
insert :: forall n a . KnownNat n => Hashable a => a -> Bloom n a -> Bloom n a
|
||||
insert a (Bloom _ b) = Bloom Sing c
|
||||
where
|
||||
c :: UArray Int Bool
|
||||
c = listArray (0, pred n) $ f <$> [0..n]
|
||||
n :: Int
|
||||
n = fromIntegral $ natVal @n Proxy
|
||||
h :: Int
|
||||
h = hash n a
|
||||
f :: Int -> Bool
|
||||
f i = b ! i || h == i
|
||||
|
||||
hash :: Hashable a => Int -> a -> Int
|
||||
hash n = (`mod` n) . Hashable.hash
|
||||
|
||||
instance (KnownNat n, Hashable a) => IsList (Bloom n a) where
|
||||
type Item (Bloom n a) = a
|
||||
fromList :: [] a -> Bloom n a
|
||||
fromList = Bloom Sing . indicesToArray (pred n) . fmap (hash n)
|
||||
where
|
||||
n :: Int
|
||||
n = fromIntegral $ natVal @n Proxy
|
||||
toList = error "Unimplemented"
|
||||
|
||||
indicesToArray :: Int -> [] Int -> UArray Int Bool
|
||||
indicesToArray n = listArray (0, n) . indicesToList
|
||||
|
||||
indicesToList :: [] Int -> [] Bool
|
||||
indicesToList xs0 = take (succ $ maximum xs0) $ sort xs0 <| [0..]
|
||||
where
|
||||
(<|) :: [] Int -> [] Int -> [] Bool
|
||||
[] <| _ = repeat False
|
||||
xs@(x:xss) <| (y:yss) = if x <= y then True : (xss <| yss) else False : (xs<|yss)
|
||||
_ <| _ = error "IMPOSSIBLE"
|
||||
|
||||
elem :: forall a n . KnownNat n => Hashable a => a -> Bloom n a -> Bool
|
||||
elem a (Bloom _ r) = r ! hash n a
|
||||
where
|
||||
n = fromIntegral $ natVal @n Proxy
|
||||
|
||||
assocs :: Bloom n a -> [(Int, Bool)]
|
||||
assocs (Bloom _ b) = Array.assocs b
|
||||
|
||||
pack :: [] Bool -> ByteString
|
||||
pack = ByteString.pack . toWords
|
||||
|
||||
unpack :: ByteString -> [] Bool
|
||||
unpack = (>>= fromWord) . ByteString.unpack
|
||||
|
||||
toWords :: [] Bool -> [] Word8
|
||||
toWords = fmap toWord . chunksOf 8
|
||||
|
||||
chunksOf :: Int -> [] a -> [[a]]
|
||||
chunksOf n xs = a : case b of [] -> [] ; _ -> chunksOf n b
|
||||
where
|
||||
a = take n xs
|
||||
b = drop n xs
|
||||
|
||||
toWord :: Bits b => [] Bool -> b
|
||||
toWord xs = foldl' (.|.) zeroBits $ zipWith shift (fmap conv xs) [0..]
|
||||
where
|
||||
conv b = if b then bit 0 else zeroBits
|
||||
|
||||
fromWord :: Word8 -> [] Bool
|
||||
fromWord w = testBit w <$> [0..7]
|
||||
|
||||
hexdump :: Bloom n a -> String
|
||||
hexdump = hexdump' . toWords . fmap snd . assocs
|
||||
|
||||
hexdump' :: [] Word8 -> String
|
||||
hexdump' = unlines . zipWith (\n s -> step16 n <> s) [0..] . ls
|
||||
where
|
||||
ls :: [Word8] -> [String]
|
||||
ls = fmap unwords . chunksOf 16 . fmap step8
|
||||
step16 :: Word16 -> [] Char
|
||||
step16 w0 = [r ! fromIntegral d0, r ! fromIntegral d1, r ! fromIntegral d2, r ! fromIntegral d3]
|
||||
where
|
||||
(d0, w1) = w0 `divMod` 0x10
|
||||
(d1, w2) = w1 `divMod` 0x10
|
||||
(d2, w3) = w2 `divMod` 0x10
|
||||
(d3, _ ) = w3 `divMod` 0x10
|
||||
step8 :: Word8 -> [] Char
|
||||
step8 w = [r ! a, r ! b]
|
||||
where
|
||||
(a, b) = w `divMod` 0x10
|
||||
r :: UArray Word8 Char
|
||||
r = listArray (0, 15) (['0'..'9'] <> ['a'..'f'])
|
48
Main.hs
Normal file
48
Main.hs
Normal file
|
@ -0,0 +1,48 @@
|
|||
{-# language TypeApplications #-}
|
||||
{-# language DataKinds #-}
|
||||
{-# options_ghc -Wall -Werror #-}
|
||||
module Main (main) where
|
||||
|
||||
import GHC.Exts (IsList(..))
|
||||
import Data.Foldable (traverse_)
|
||||
import System.Exit (ExitCode(ExitSuccess, ExitFailure), exitWith)
|
||||
import Bloom (Bloom)
|
||||
import qualified Bloom
|
||||
import qualified Data.Binary as Binary
|
||||
import qualified Data.ByteString.Lazy as ByteString
|
||||
import System.Directory
|
||||
import System.FilePath
|
||||
import System.Environment
|
||||
|
||||
main :: IO ()
|
||||
main = do
|
||||
b <- cachedLoad
|
||||
input <- lines <$> getContents
|
||||
let ws' = filter (not . (`Bloom.elem` b)) input
|
||||
traverse_ putStrLn ws'
|
||||
exitWith (if null ws' then ExitSuccess else ExitFailure 1)
|
||||
|
||||
cachedLoad :: IO (Bloom Size String)
|
||||
cachedLoad = do
|
||||
cache <- getCachePath
|
||||
cacheExists <- doesFileExist cache
|
||||
if cacheExists
|
||||
then load cache
|
||||
else do
|
||||
b <- build
|
||||
ByteString.writeFile cache $ Binary.encode b
|
||||
pure b
|
||||
|
||||
getCachePath :: IO FilePath
|
||||
getCachePath = (</> "bloom-spell/words.bin") <$> getEnv "XDG_DATA_HOME"
|
||||
|
||||
load :: FilePath -> IO (Bloom Size String)
|
||||
load p = Binary.decode @(Bloom Size _) <$> ByteString.readFile p
|
||||
|
||||
build :: IO (Bloom Size String)
|
||||
build = fromList @(Bloom Size _) . lines <$> readFile dict
|
||||
|
||||
type Size = 0xfffff
|
||||
|
||||
dict :: FilePath
|
||||
dict = "/usr/share/dict/words"
|
Loading…
Reference in a new issue