Hello world
This commit is contained in:
commit
7f0b3a166a
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
*.hi
|
||||||
|
*.o
|
||||||
|
*.bin
|
||||||
|
Main
|
160
Bloom.hs
Normal file
160
Bloom.hs
Normal file
|
@ -0,0 +1,160 @@
|
||||||
|
{-# language RoleAnnotations #-}
|
||||||
|
{-# language GADTs #-}
|
||||||
|
{-# language DataKinds #-}
|
||||||
|
{-# language InstanceSigs #-}
|
||||||
|
{-# language ScopedTypeVariables #-}
|
||||||
|
{-# language StandaloneDeriving #-}
|
||||||
|
{-# language DerivingStrategies #-}
|
||||||
|
{-# language TypeApplications #-}
|
||||||
|
{-# language TypeFamilies #-}
|
||||||
|
{-# options_ghc -Wall -Werror #-}
|
||||||
|
module Bloom (singleton, insert, Bloom, elem, assocs, hexdump) where
|
||||||
|
|
||||||
|
import Prelude hiding (elem)
|
||||||
|
import Data.Singletons
|
||||||
|
import Data.Kind (Type)
|
||||||
|
import Data.Array.Unboxed (UArray, listArray, (!))
|
||||||
|
import qualified Data.Array.Unboxed as Array
|
||||||
|
import GHC.Types (Nat)
|
||||||
|
import GHC.TypeLits (KnownNat, natVal)
|
||||||
|
import Data.Hashable (Hashable)
|
||||||
|
import qualified Data.Hashable as Hashable
|
||||||
|
import GHC.Exts (IsList(..))
|
||||||
|
import Data.List (sort)
|
||||||
|
import Data.Binary
|
||||||
|
import Control.Monad (when)
|
||||||
|
import Data.Bits
|
||||||
|
import Data.Foldable (foldl')
|
||||||
|
import Data.ByteString.Lazy (ByteString)
|
||||||
|
import qualified Data.ByteString.Lazy as ByteString
|
||||||
|
|
||||||
|
type role Bloom nominal nominal
|
||||||
|
data Bloom :: Nat -> Type -> Type where
|
||||||
|
Bloom :: forall a n . Sing n -> UArray Int Bool -> Bloom n a
|
||||||
|
|
||||||
|
deriving stock instance Show (Bloom n a)
|
||||||
|
instance Eq (Bloom n a) where
|
||||||
|
Bloom _ a == Bloom _ b = a == b
|
||||||
|
|
||||||
|
instance KnownNat n => Semigroup (Bloom n a) where
|
||||||
|
(<>) :: Bloom n a -> Bloom n a -> Bloom n a
|
||||||
|
Bloom _ a <> Bloom _ b = Bloom Sing c
|
||||||
|
where
|
||||||
|
c :: UArray Int Bool
|
||||||
|
c = listArray (0, pred n) $ f <$> [0..]
|
||||||
|
n :: Int
|
||||||
|
n = fromIntegral $ natVal @n Proxy
|
||||||
|
f :: Int -> Bool
|
||||||
|
f i = a ! i || b ! i
|
||||||
|
|
||||||
|
instance KnownNat n => Monoid (Bloom n a) where
|
||||||
|
mempty :: Bloom n a
|
||||||
|
mempty = Bloom Sing c
|
||||||
|
where
|
||||||
|
c :: UArray Int Bool
|
||||||
|
c = listArray (0, pred n) $ repeat False
|
||||||
|
n :: Int
|
||||||
|
n = fromIntegral $ natVal @n Proxy
|
||||||
|
|
||||||
|
instance KnownNat n => Binary (Bloom n a) where
|
||||||
|
-- This is not really a valid instance, because we don't know what
|
||||||
|
-- `n` and `a` was used to encode what we're `get`ing.
|
||||||
|
get = do
|
||||||
|
n <- get @Integer
|
||||||
|
let n' = natVal @n Proxy
|
||||||
|
when (n /= n') $ fail $ "Bloom.get: Wrong bucket size. Expected " <> show n' <> ". Got: " <> show n <> "."
|
||||||
|
Bloom Sing . listArray (0, fromInteger (pred n)) . unpack <$> get @ByteString
|
||||||
|
put b = do
|
||||||
|
put $ natVal @n Proxy
|
||||||
|
put @ByteString $ pack $ snd <$> assocs b
|
||||||
|
|
||||||
|
singleton :: KnownNat n => Hashable a => a -> Bloom n a
|
||||||
|
singleton a = insert a mempty
|
||||||
|
|
||||||
|
insert :: forall n a . KnownNat n => Hashable a => a -> Bloom n a -> Bloom n a
|
||||||
|
insert a (Bloom _ b) = Bloom Sing c
|
||||||
|
where
|
||||||
|
c :: UArray Int Bool
|
||||||
|
c = listArray (0, pred n) $ f <$> [0..n]
|
||||||
|
n :: Int
|
||||||
|
n = fromIntegral $ natVal @n Proxy
|
||||||
|
h :: Int
|
||||||
|
h = hash n a
|
||||||
|
f :: Int -> Bool
|
||||||
|
f i = b ! i || h == i
|
||||||
|
|
||||||
|
hash :: Hashable a => Int -> a -> Int
|
||||||
|
hash n = (`mod` n) . Hashable.hash
|
||||||
|
|
||||||
|
instance (KnownNat n, Hashable a) => IsList (Bloom n a) where
|
||||||
|
type Item (Bloom n a) = a
|
||||||
|
fromList :: [] a -> Bloom n a
|
||||||
|
fromList = Bloom Sing . indicesToArray (pred n) . fmap (hash n)
|
||||||
|
where
|
||||||
|
n :: Int
|
||||||
|
n = fromIntegral $ natVal @n Proxy
|
||||||
|
toList = error "Unimplemented"
|
||||||
|
|
||||||
|
indicesToArray :: Int -> [] Int -> UArray Int Bool
|
||||||
|
indicesToArray n = listArray (0, n) . indicesToList
|
||||||
|
|
||||||
|
indicesToList :: [] Int -> [] Bool
|
||||||
|
indicesToList xs0 = take (succ $ maximum xs0) $ sort xs0 <| [0..]
|
||||||
|
where
|
||||||
|
(<|) :: [] Int -> [] Int -> [] Bool
|
||||||
|
[] <| _ = repeat False
|
||||||
|
xs@(x:xss) <| (y:yss) = if x <= y then True : (xss <| yss) else False : (xs<|yss)
|
||||||
|
_ <| _ = error "IMPOSSIBLE"
|
||||||
|
|
||||||
|
elem :: forall a n . KnownNat n => Hashable a => a -> Bloom n a -> Bool
|
||||||
|
elem a (Bloom _ r) = r ! hash n a
|
||||||
|
where
|
||||||
|
n = fromIntegral $ natVal @n Proxy
|
||||||
|
|
||||||
|
assocs :: Bloom n a -> [(Int, Bool)]
|
||||||
|
assocs (Bloom _ b) = Array.assocs b
|
||||||
|
|
||||||
|
pack :: [] Bool -> ByteString
|
||||||
|
pack = ByteString.pack . toWords
|
||||||
|
|
||||||
|
unpack :: ByteString -> [] Bool
|
||||||
|
unpack = (>>= fromWord) . ByteString.unpack
|
||||||
|
|
||||||
|
toWords :: [] Bool -> [] Word8
|
||||||
|
toWords = fmap toWord . chunksOf 8
|
||||||
|
|
||||||
|
chunksOf :: Int -> [] a -> [[a]]
|
||||||
|
chunksOf n xs = a : case b of [] -> [] ; _ -> chunksOf n b
|
||||||
|
where
|
||||||
|
a = take n xs
|
||||||
|
b = drop n xs
|
||||||
|
|
||||||
|
toWord :: Bits b => [] Bool -> b
|
||||||
|
toWord xs = foldl' (.|.) zeroBits $ zipWith shift (fmap conv xs) [0..]
|
||||||
|
where
|
||||||
|
conv b = if b then bit 0 else zeroBits
|
||||||
|
|
||||||
|
fromWord :: Word8 -> [] Bool
|
||||||
|
fromWord w = testBit w <$> [0..7]
|
||||||
|
|
||||||
|
hexdump :: Bloom n a -> String
|
||||||
|
hexdump = hexdump' . toWords . fmap snd . assocs
|
||||||
|
|
||||||
|
hexdump' :: [] Word8 -> String
|
||||||
|
hexdump' = unlines . zipWith (\n s -> step16 n <> s) [0..] . ls
|
||||||
|
where
|
||||||
|
ls :: [Word8] -> [String]
|
||||||
|
ls = fmap unwords . chunksOf 16 . fmap step8
|
||||||
|
step16 :: Word16 -> [] Char
|
||||||
|
step16 w0 = [r ! fromIntegral d0, r ! fromIntegral d1, r ! fromIntegral d2, r ! fromIntegral d3]
|
||||||
|
where
|
||||||
|
(d0, w1) = w0 `divMod` 0x10
|
||||||
|
(d1, w2) = w1 `divMod` 0x10
|
||||||
|
(d2, w3) = w2 `divMod` 0x10
|
||||||
|
(d3, _ ) = w3 `divMod` 0x10
|
||||||
|
step8 :: Word8 -> [] Char
|
||||||
|
step8 w = [r ! a, r ! b]
|
||||||
|
where
|
||||||
|
(a, b) = w `divMod` 0x10
|
||||||
|
r :: UArray Word8 Char
|
||||||
|
r = listArray (0, 15) (['0'..'9'] <> ['a'..'f'])
|
48
Main.hs
Normal file
48
Main.hs
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
{-# language TypeApplications #-}
|
||||||
|
{-# language DataKinds #-}
|
||||||
|
{-# options_ghc -Wall -Werror #-}
|
||||||
|
module Main (main) where
|
||||||
|
|
||||||
|
import GHC.Exts (IsList(..))
|
||||||
|
import Data.Foldable (traverse_)
|
||||||
|
import System.Exit (ExitCode(ExitSuccess, ExitFailure), exitWith)
|
||||||
|
import Bloom (Bloom)
|
||||||
|
import qualified Bloom
|
||||||
|
import qualified Data.Binary as Binary
|
||||||
|
import qualified Data.ByteString.Lazy as ByteString
|
||||||
|
import System.Directory
|
||||||
|
import System.FilePath
|
||||||
|
import System.Environment
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = do
|
||||||
|
b <- cachedLoad
|
||||||
|
input <- lines <$> getContents
|
||||||
|
let ws' = filter (not . (`Bloom.elem` b)) input
|
||||||
|
traverse_ putStrLn ws'
|
||||||
|
exitWith (if null ws' then ExitSuccess else ExitFailure 1)
|
||||||
|
|
||||||
|
cachedLoad :: IO (Bloom Size String)
|
||||||
|
cachedLoad = do
|
||||||
|
cache <- getCachePath
|
||||||
|
cacheExists <- doesFileExist cache
|
||||||
|
if cacheExists
|
||||||
|
then load cache
|
||||||
|
else do
|
||||||
|
b <- build
|
||||||
|
ByteString.writeFile cache $ Binary.encode b
|
||||||
|
pure b
|
||||||
|
|
||||||
|
getCachePath :: IO FilePath
|
||||||
|
getCachePath = (</> "bloom-spell/words.bin") <$> getEnv "XDG_DATA_HOME"
|
||||||
|
|
||||||
|
load :: FilePath -> IO (Bloom Size String)
|
||||||
|
load p = Binary.decode @(Bloom Size _) <$> ByteString.readFile p
|
||||||
|
|
||||||
|
build :: IO (Bloom Size String)
|
||||||
|
build = fromList @(Bloom Size _) . lines <$> readFile dict
|
||||||
|
|
||||||
|
type Size = 0xfffff
|
||||||
|
|
||||||
|
dict :: FilePath
|
||||||
|
dict = "/usr/share/dict/words"
|
Loading…
Reference in a new issue