diff --git a/.travis.yml b/.travis.yml index e37059f..e9553de 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,9 +2,13 @@ # # haskell-ci 'base16-bytestring.cabal' # +# To regenerate the script (for example after adjusting tested-with) run +# +# haskell-ci regenerate +# # For more information, see https://github.com/haskell-CI/haskell-ci # -# version: 0.9.20200406 +# version: 0.10.1 # version: ~> 1.0 language: c @@ -162,5 +166,5 @@ script: - rm -f cabal.project.local - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all -# REGENDATA ("0.9.20200406",["base16-bytestring.cabal"]) +# REGENDATA ("0.10.1",["base16-bytestring.cabal"]) # EOF diff --git a/Data/ByteString/Base16.hs b/Data/ByteString/Base16.hs index bdcd935..42a7c83 100644 --- a/Data/ByteString/Base16.hs +++ b/Data/ByteString/Base16.hs @@ -1,32 +1,39 @@ -{-# LANGUAGE BangPatterns, MagicHash #-} - +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE CPP #-} +{-# LANGUAGE MagicHash #-} -- | -- Module : Data.ByteString.Base16 -- Copyright : (c) 2011 MailRank, Inc. -- -- License : BSD --- Maintainer : bos@serpentine.com +-- Maintainer : Herbert Valerio Riedel , +-- Mikhail Glushenkov , +-- Emily Pillmore -- Stability : experimental -- Portability : GHC -- -- Fast and efficient encoding and decoding of base16-encoded strings. - +-- module Data.ByteString.Base16 - ( - encode - , decode - ) where +( encode +, decode +, decodeLenient +) where + +import Data.ByteString (empty) +import Data.ByteString.Base16.Internal +import Data.ByteString.Internal + +import Foreign.ForeignPtr +import Foreign.Ptr + +import GHC.ForeignPtr +#if __GLASGOW_HASKELL__ >= 702 +import System.IO.Unsafe (unsafeDupablePerformIO) +#else +import GHC.IO (unsafeDupablePerformIO) +#endif -import Data.ByteString.Char8 (empty) -import Data.ByteString.Internal (ByteString(..), createAndTrim', unsafeCreate) -import Data.Bits (shiftL) -import Foreign.ForeignPtr (ForeignPtr, withForeignPtr) -import Foreign.Ptr (Ptr, minusPtr, plusPtr) -import Foreign.Storable (peek, poke) -import System.IO.Unsafe (unsafePerformIO) -import GHC.Prim -import GHC.Types -import GHC.Word -- | Encode a string into base16 form. The result will always be a -- multiple of 2 bytes in length. @@ -37,108 +44,46 @@ import GHC.Word encode :: ByteString -> ByteString encode (PS sfp soff slen) | slen > maxBound `div` 2 = - error "Data.ByteString.Base16.encode: input too long" - | otherwise = unsafeCreate (slen*2) $ \dptr -> - withForeignPtr sfp $ \sptr -> - enc (sptr `plusPtr` soff) dptr - where - enc sptr = go sptr where - e = sptr `plusPtr` slen - go s d | s == e = return () - | otherwise = do - x <- peek8 s - poke d (tlookup tableHi x) - poke (d `plusPtr` 1) (tlookup tableLo x) - go (s `plusPtr` 1) (d `plusPtr` 2) - tlookup :: Addr# -> Int -> Word8 - tlookup table (I# index) = W8# (indexWord8OffAddr# table index) - !tableLo = - "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\ - \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66"# - !tableHi = - "\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\ - \\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\ - \\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\ - \\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\ - \\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\ - \\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\ - \\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\ - \\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\ - \\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\ - \\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\ - \\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\ - \\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\ - \\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\ - \\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\ - \\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\ - \\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66"# + error "Data.ByteString.Base16.encode: input too long" + | otherwise = unsafeCreate (slen * 2) $ \dptr -> + withForeignPtr sfp $ \sptr -> + encodeLoop dptr + (sptr `plusPtr` soff) + (sptr `plusPtr` (soff + slen)) --- | Decode a string from base16 form. The first element of the --- returned tuple contains the decoded data. The second element starts --- at the first invalid base16 sequence in the original string. +-- | Decode a base16-encoded 'ByteString' value. -- --- Examples: +-- If errors are encountered during the decoding process, +-- then they will be returned in the @Left@ clause of the +-- coproduct. -- --- > decode "666f6f" == ("foo", "") --- > decode "66quux" == ("f", "quux") --- > decode "666quux" == ("f", "6quux") -decode :: ByteString -> (ByteString, ByteString) -decode (PS sfp soff slen) = - unsafePerformIO . createAndTrim' (slen `div` 2) $ \dptr -> - withForeignPtr sfp $ \sptr -> - dec (sptr `plusPtr` soff) dptr - where - dec sptr = go sptr where - e = sptr `plusPtr` if odd slen then slen - 1 else slen - go s d | s == e = let len = e `minusPtr` sptr - in return (0, len `div` 2, ps sfp (soff+len) (slen-len)) - | otherwise = do - hi <- hex `fmap` peek8 s - lo <- hex `fmap` peek8 (s `plusPtr` 1) - if lo == 0xff || hi == 0xff - then let len = s `minusPtr` sptr - in return (0, len `div` 2, ps sfp (soff+len) (slen-len)) - else do - poke d . fromIntegral $ lo + (hi `shiftL` 4) - go (s `plusPtr` 2) (d `plusPtr` 1) - - hex (I# index) = W8# (indexWord8OffAddr# table index) - !table = - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\ - \\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ - \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# +decode :: ByteString -> Either String ByteString +decode (PS sfp soff slen) + | slen == 0 = Right empty + | r /= 0 = Left "invalid bytestring size" + | otherwise = unsafeDupablePerformIO $ do + dfp <- mallocPlainForeignPtrBytes q + withForeignPtr dfp $ \dptr -> + withForeignPtr sfp $ \sptr -> + decodeLoop dfp dptr + (plusPtr sptr soff) + (plusPtr sptr (soff + slen)) + where + !q = slen `quot` 2 + !r = slen `rem` 2 -peek8 :: Ptr Word8 -> IO Int -peek8 p = fromIntegral `fmap` peek p - -ps :: ForeignPtr Word8 -> Int -> Int -> ByteString -ps fp off len - | len <= 0 = empty - | otherwise = PS fp off len +decodeLenient :: ByteString -> ByteString +decodeLenient (PS !sfp !soff !slen) + | slen == 0 = empty + | otherwise = unsafeDupablePerformIO $ do + dfp <- mallocPlainForeignPtrBytes dlen + withForeignPtr dfp $ \dptr -> + withForeignPtr sfp $ \sptr -> + lenientLoop + dfp + dptr + (plusPtr sptr soff) + (plusPtr sptr (soff + slen)) + where + !q = slen `quot` 2 + !dlen = q * 2 diff --git a/Data/ByteString/Base16/Internal.hs b/Data/ByteString/Base16/Internal.hs new file mode 100644 index 0000000..ccd591d --- /dev/null +++ b/Data/ByteString/Base16/Internal.hs @@ -0,0 +1,158 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE MagicHash #-} +module Data.ByteString.Base16.Internal +( -- * worker loops + encodeLoop +, decodeLoop +, lenientLoop + -- * utils +, c2w +, aix +, reChunk +, unsafeShiftR +) where + + +import Data.Bits ((.&.), (.|.)) +import qualified Data.ByteString as B +import Data.ByteString.Internal (ByteString(..)) +import Data.Char (ord) + +import Foreign.ForeignPtr +import Foreign.Ptr +import Foreign.Storable + +import GHC.Word +import GHC.Exts + (Int(I#), Addr#, indexWord8OffAddr#, word2Int#, uncheckedShiftRL#) + + +-- ------------------------------------------------------------------ -- +-- Loops + +encodeLoop + :: Ptr Word8 + -> Ptr Word8 + -> Ptr Word8 + -> IO () +encodeLoop !dptr !sptr !end = go dptr sptr + where + !hex = "0123456789abcdef"# + + go !dst !src + | src == end = return () + | otherwise = do + !t <- peek src + + poke dst (aix (unsafeShiftR t 4) hex) + poke (plusPtr dst 1) (aix (t .&. 0x0f) hex) + + go (plusPtr dst 2) (plusPtr src 1) +{-# INLINE encodeLoop #-} + +decodeLoop + :: ForeignPtr Word8 + -> Ptr Word8 + -> Ptr Word8 + -> Ptr Word8 + -> IO (Either String ByteString) +decodeLoop !dfp !dptr !sptr !end = go dptr sptr + where + err !src = return . Left + $ "invalid character at offset: " + ++ show (src `minusPtr` sptr) + + !lo = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# + + !hi = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# + + go !dst !src + | src == end = return (Right (PS dfp 0 (src `minusPtr` sptr))) + | otherwise = do + !x <- peek src + !y <- peek (plusPtr src 1) + + let !a = aix x hi + !b = aix y lo + + if a == 0xff + then err src + else + if b == 0xff + then err (plusPtr src 1) + else do + poke dst (a .|. b) + go (plusPtr dst 1) (plusPtr src 2) +{-# INLINE decodeLoop #-} + +lenientLoop + :: ForeignPtr Word8 + -> Ptr Word8 + -> Ptr Word8 + -> Ptr Word8 + -> IO ByteString +lenientLoop !dfp !dptr !sptr !end = goHi dptr sptr 0 + where + !lo = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# + + !hi = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# + + goHi !dst !src !n + | src == end = return (PS dfp 0 n) + | otherwise = do + !x <- peek src + + let !a = aix x hi + + if a == 0xff + then goHi dst (plusPtr src 1) n + else goLo dst (plusPtr src 1) a n + + goLo !dst !src !a !n + | src == end = return (PS dfp 0 n) + | otherwise = do + !y <- peek src + + let !b = aix y lo + + if b == 0xff + then goLo dst (plusPtr src 1) a n + else do + poke dst (a .|. b) + goHi (plusPtr dst 1) (plusPtr src 1) (n + 1) +{-# LANGUAGE lenientLoop #-} + + +-- ------------------------------------------------------------------ -- +-- Utils + +aix :: Word8 -> Addr# -> Word8 +aix (W8# w) table = W8# (indexWord8OffAddr# table (word2Int# w)) +{-# INLINE aix #-} + +-- | Form a list of chunks, and rechunk the list of bytestrings +-- into length multiples of 2 +-- +reChunk :: [ByteString] -> [ByteString] +reChunk [] = [] +reChunk (c:cs) = case B.length c `divMod` 2 of + (_, 0) -> c : reChunk cs + (n, _) -> case B.splitAt (n * 2) c of + ~(m, q) -> m : cont_ q cs + where + cont_ q [] = [q] + cont_ q (a:as) = case B.splitAt 1 a of + ~(x, y) -> let q' = B.append q x + in if B.length q' == 2 + then + let as' = if B.null y then as else y:as + in q' : reChunk as' + else cont_ q' as + +unsafeShiftR :: Word8 -> Int -> Word8 +unsafeShiftR (W8# x#) (I# i#) = W8# (x# `uncheckedShiftRL#` i#) +{-# INLINE unsafeShiftR #-} + +c2w :: Char -> Word8 +c2w = fromIntegral . ord +{-# INLINE c2w #-} diff --git a/Data/ByteString/Base16/Lazy.hs b/Data/ByteString/Base16/Lazy.hs index a66abbe..9efdba3 100644 --- a/Data/ByteString/Base16/Lazy.hs +++ b/Data/ByteString/Base16/Lazy.hs @@ -1,5 +1,4 @@ {-# LANGUAGE OverloadedStrings #-} - -- | -- Module : Data.ByteString.Base16.Lazy -- Copyright : (c) 2011 MailRank, Inc. @@ -12,17 +11,17 @@ -- Fast and efficient encoding and decoding of base16-encoded strings. module Data.ByteString.Base16.Lazy - ( - encode - , decode - ) where +( encode +, decode +, decodeLenient +) where + -import Data.Word (Word8) +import qualified Data.ByteString as BS +import qualified Data.ByteString.Lazy as LBS import qualified Data.ByteString.Base16 as B16 -import qualified Data.ByteString as B -import qualified Data.ByteString.Unsafe as B -import qualified Data.ByteString.Lazy as BL -import Data.ByteString.Lazy.Internal +import Data.ByteString.Base16.Internal +import Data.ByteString.Lazy.Internal (ByteString(..)) -- | Encode a string into base16 form. The result will always be a -- multiple of 2 bytes in length. @@ -31,8 +30,8 @@ import Data.ByteString.Lazy.Internal -- -- > encode "foo" == "666f6f" encode :: ByteString -> ByteString +encode Empty = Empty encode (Chunk c cs) = Chunk (B16.encode c) (encode cs) -encode Empty = Empty -- | Decode a string from base16 form. The first element of the -- returned tuple contains the decoded data. The second element starts @@ -45,24 +44,24 @@ encode Empty = Empty -- > decode "666f6f" == ("foo", "") -- > decode "66quux" == ("f", "quux") -- > decode "666quux" == ("f", "6quux") -decode :: ByteString -> (ByteString, ByteString) -decode = go Nothing - where - go :: Maybe Word8 -> ByteString -> (ByteString, ByteString) - go Nothing Empty = (Empty, Empty) - go (Just w) Empty = (Empty, BL.singleton w) - go (Just w) (Chunk c z) = - go Nothing (chunk (B.pack [w, B.unsafeHead c]) (chunk (B.unsafeTail c) z)) - go Nothing (Chunk c z) - | len == 0 = - let ~(res,tail') = go Nothing z - in (chunk h res, tail') - | len == 1 && isHex (B.unsafeHead t) = - let ~(res,tail') = go (Just (B.unsafeHead t)) z - in (chunk h res, tail') - | otherwise = (chunk h Empty, chunk t z) - where (h,t) = B16.decode c - len = B.length t +decode :: ByteString -> Either String ByteString +decode Empty = Right Empty +decode (Chunk b bs) = case B16.decode b of + Right b' -> case decode bs of + Left t -> Left t + Right bs' -> Right (Chunk b' bs') + Left t -> Left t -isHex :: Word8 -> Bool -isHex w = (w >= 48 && w <= 57) || (w >= 97 && w <= 102) || (w >= 65 && w <= 70) +-- | Decode a Base16-encoded 'ByteString' value leniently, using a +-- strategy that never fails. +-- +-- N.B.: this is not RFC 4648-compliant +-- +decodeLenient :: ByteString -> ByteString +decodeLenient = LBS.fromChunks + . fmap B16.decodeLenient + . reChunk + . fmap (BS.filter (flip BS.elem hex)) + . LBS.toChunks + where + hex = BS.pack (fmap c2w "0123456789abcdef") diff --git a/README.md b/README.md index 5277eff..c84df11 100644 --- a/README.md +++ b/README.md @@ -5,18 +5,6 @@ This package provides a Haskell library for working with base16-encoded data quickly and efficiently, using the `ByteString` type. - -# Performance - -This library is written in pure Haskell, and it's fast: - -* 250 MB/sec encoding - -* 200 MB/sec strict decoding (per RFC 4648) - -* 100 MB/sec lenient decoding - - # Get involved! Please report bugs via the @@ -29,6 +17,7 @@ Master [Git repository](http://github.com/haskell/base16-bytestring): # Authors -This library is written by [Bryan O'Sullivan](mailto:bos@serpentine.com). It -is maintained by [Herbert Valerio Riedel](mailto:hvr@gnu.org) and [Mikhail +This library is written by [Bryan O'Sullivan](mailto:bos@serpentine.com). + +It is currently maintained by [Emily Pillmore](mailto:emilypi@cohomolo.gy), [Herbert Valerio Riedel](mailto:hvr@gnu.org) and [Mikhail Glushenkov](mailto:mikhail.glushenkov@gmail.com). diff --git a/base16-bytestring.cabal b/base16-bytestring.cabal index 5d18896..2727ea4 100644 --- a/base16-bytestring.cabal +++ b/base16-bytestring.cabal @@ -1,51 +1,87 @@ -cabal-version: >=1.8 -name: base16-bytestring -version: 0.1.1.7 -synopsis: Fast base16 (hex) encoding and decoding for ByteStrings -description: This package provides support for encoding and decoding binary data according - to @base16@ (see also ) for - strict (see "Data.ByteString.Base16") and lazy @ByteString@s (see "Data.ByteString.Base16.Lazy"). - . - See also the package which - provides an uniform API providing conversion paths between more binary and textual types. -homepage: http://github.com/haskell/base16-bytestring -bug-reports: http://github.com/haskell/base16-bytestring/issues -license: BSD3 -license-file: LICENSE -copyright: Copyright 2011 MailRank, Inc.; - Copyright 2010-2020 Bryan O'Sullivan et al. -author: Bryan O'Sullivan -maintainer: Herbert Valerio Riedel , - Mikhail Glushenkov , - Emily Pillmore -category: Data -build-type: Simple -extra-source-files: README.md CHANGELOG.md -tested-with: GHC==8.10.1, GHC==8.8.3, GHC==8.6.5, - GHC==8.4.4, GHC==8.2.2, GHC==8.0.2, - GHC==7.10.3, GHC==7.8.4, GHC==7.6.3, - GHC==7.4.2, GHC==7.2.2, GHC==7.0.4 +cabal-version: >=1.12 +name: base16-bytestring +version: 1.0.0.0 +synopsis: Fast base16 (hex) encoding and decoding for ByteStrings +description: + This package provides support for encoding and decoding binary data according + to @base16@ (see also ) for + strict (see "Data.ByteString.Base16") and lazy @ByteString@s (see "Data.ByteString.Base16.Lazy"). + . + See also the package which + provides an uniform API providing conversion paths between more binary and textual types. + +homepage: http://github.com/haskell/base16-bytestring +bug-reports: http://github.com/haskell/base16-bytestring/issues +license: BSD3 +license-file: LICENSE +copyright: + Copyright 2011 MailRank, Inc.; + Copyright 2010-2020 Bryan O'Sullivan et al. + +author: Bryan O'Sullivan +maintainer: + Herbert Valerio Riedel , + Mikhail Glushenkov , + Emily Pillmore + +category: Data +build-type: Simple +extra-source-files: + README.md + CHANGELOG.md + +tested-with: + GHC ==7.0.4 + || ==7.2.2 + || ==7.4.2 + || ==7.6.3 + || ==7.8.4 + || ==7.10.3 + || ==8.0.2 + || ==8.2.2 + || ==8.4.4 + || ==8.6.5 + || ==8.8.3 + || ==8.10.1 + +source-repository head + type: git + location: http://github.com/haskell/base16-bytestring library + other-modules: Data.ByteString.Base16.Internal exposed-modules: Data.ByteString.Base16 Data.ByteString.Base16.Lazy build-depends: - base == 4.*, - bytestring >= 0.9, - ghc-prim + base >=4 && <5 + , bytestring >=0.9 && <0.11 - ghc-options: -Wall -funbox-strict-fields - -source-repository head - type: git - location: http://github.com/haskell/base16-bytestring + ghc-options: -Wall -funbox-strict-fields + default-language: Haskell2010 test-suite test - type: exitcode-stdio-1.0 - hs-source-dirs: tests - main-is: Tests.hs - build-depends: base - , base16-bytestring - , bytestring + type: exitcode-stdio-1.0 + hs-source-dirs: tests + main-is: Tests.hs + build-depends: + base + , base16-bytestring + , bytestring + + default-language: Haskell2010 + +benchmark bench + type: exitcode-stdio-1.0 + hs-source-dirs: benchmarks + main-is: Benchmarks.hs + build-depends: + base >=4 && <5 + , base16-bytestring + , bytestring + , criterion + , deepseq + , QuickCheck + + default-language: Haskell2010 diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs index 6674161..93bdb0a 100644 --- a/benchmarks/Benchmarks.hs +++ b/benchmarks/Benchmarks.hs @@ -1,23 +1,49 @@ +module Main +( main +) where + + +import Criterion import Criterion.Main + import qualified Data.ByteString.Base16 as B16 import qualified Data.ByteString as B generate :: Int -> B.ByteString generate n = B.pack . take n . cycle $ [0..255] -main = defaultMain [ - bgroup "encode" [ - bench "8" $ whnf B16.encode (generate 8) - , bench "32" $ whnf B16.encode (generate 32) - , bench "128" $ whnf B16.encode (generate 128) - , bench "1024" $ whnf B16.encode (generate 1024) - , bench "65536" $ whnf B16.encode (generate 65536) - ] - , bgroup "decode" [ - bench "8" $ whnf (B16.decode . B16.encode) (generate 8) - , bench "32" $ whnf (B16.decode . B16.encode) (generate 32) - , bench "128" $ whnf (B16.decode . B16.encode) (generate 128) - , bench "1024" $ whnf (B16.decode . B16.encode) (generate 1024) - , bench "65536" $ whnf (B16.decode . B16.encode) (generate 65536) - ] - ] +main = defaultMain + [ case bs of + ~(a,b,c,d,e) -> bgroup "encode" + [ bench "25" $ whnf B16.encode a + , bench "100" $ whnf B16.encode b + , bench "1000" $ whnf B16.encode c + , bench "10000" $ whnf B16.encode d + , bench "100000" $ whnf B16.encode e + ] + , case bs of + ~(a,b,c,d,e) -> bgroup "decode" + [ bench "25" $ whnf B16.decode a + , bench "100" $ whnf B16.decode b + , bench "1000" $ whnf B16.decode c + , bench "10000" $ whnf B16.decode d + , bench "100000" $ whnf B16.decode e + ] + ] + where + bs = + let a = generate 25 + b = generate 100 + c = generate 1000 + d = generate 10000 + e = generate 100000 + in (a,b,c,d,e) + + bs' = + let a = generate 25 + b = generate 100 + c = generate 1000 + d = generate 10000 + e = generate 100000 + f = B16.encode + in (f a, f b, f c, f d, f e) diff --git a/benchmarks/base16-bytestring-benchmarks.cabal b/benchmarks/base16-bytestring-benchmarks.cabal deleted file mode 100644 index 534a2ca..0000000 --- a/benchmarks/base16-bytestring-benchmarks.cabal +++ /dev/null @@ -1,14 +0,0 @@ -name: base16-bytestring-benchmarks -version: 0 -cabal-version: >= 1.8 -build-type: Simple - -executable bm - main-is: Benchmarks.hs - - build-depends: - base, - base16-bytestring, - bytestring, - criterion >= 0.5.0.10, - text >= 0.11.0.8 diff --git a/tests/Tests.hs b/tests/Tests.hs index dd9ab0b..4a89ad9 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -16,53 +16,5 @@ c2w = fromIntegral . ord main :: IO () main = do - let hexL = map c2w "0123456789abcdef" - hexU = map c2w "0123456789ABCDEF" - hexUL = map c2w "0123456789ABCDEFabcdef" - notHex = [c | c <- [0..255], c `notElem` hexUL] - hexL2 = do a <- hexL; b <- hexL; [a,b] - hexU2 = do a <- hexU; b <- hexU; [a,b] - bytes = B.pack [0..255] - - -- Encode every byte - True <- Base16.encode bytes `shouldBe` B.pack hexL2 - - -- Decode every valid hex pair - True <- Base16.decode (B.pack hexL2) `shouldBe` (bytes, B.empty) - True <- Base16.decode (B.pack hexU2) `shouldBe` (bytes, B.empty) - - -- Decode every invalid byte paired with a correct byte - let bads1 = [B.pack [a,b] | a <- notHex, b <- hexUL] - let bads2 = [B.pack [a,b] | a <- hexUL, b <- notHex] - True <- map Base16.decode bads1 `shouldBe` map (\s -> (B.empty, s)) bads1 - True <- map Base16.decode bads2 `shouldBe` map (\s -> (B.empty, s)) bads2 - - -- Like above, but start with a correct byte - let correctHex = B.pack [97,98] - correctBytes = B.pack [171] - True <- map (Base16.decode . (correctHex `B.append`)) bads1 - `shouldBe` map (\s -> (correctBytes, s)) bads1 - True <- map (Base16.decode . (correctHex `B.append`)) bads2 - `shouldBe` map (\s -> (correctBytes, s)) bads2 - - -- Like above, but end with a correct byte - True <- map (Base16.decode . (`B.append` correctHex)) bads1 - `shouldBe` map (\s -> (B.empty, s `B.append` correctHex)) bads1 - True <- map (Base16.decode . (`B.append` correctHex)) bads2 - `shouldBe` map (\s -> (B.empty, s `B.append` correctHex)) bads2 - - -- Lazy decoding also works with odd length chunks - let encodedLazy = BL.fromChunks $ map (B.pack . map c2w) ["614","239","6","142","39"] - True <- Base16L.decode encodedLazy `shouldBe` (BL.pack . map c2w $ "aB9aB9",BL.empty) - - -- Lazy decoding is lazy on success - let encodedLazy = BL.iterate id 48 - True <- (BL.unpack . BL.take 8 . fst . Base16L.decode $ encodedLazy) - `shouldBe` [0,0,0,0,0,0,0,0] - - -- Lazy decoding is lazy on failure - let encodedLazy = BL.iterate id 47 - True <- (BL.unpack . BL.take 8 . fst . Base16L.decode $ encodedLazy) - `shouldBe` [] return ()