diff --git a/.travis.yml b/.travis.yml
index e37059f..e9553de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -2,9 +2,13 @@
 #
 #   haskell-ci 'base16-bytestring.cabal'
 #
+# To regenerate the script (for example after adjusting tested-with) run
+#
+#   haskell-ci regenerate
+#
 # For more information, see https://github.com/haskell-CI/haskell-ci
 #
-# version: 0.9.20200406
+# version: 0.10.1
 #
 version: ~> 1.0
 language: c
@@ -162,5 +166,5 @@ script:
   - rm -f cabal.project.local
   - ${CABAL} v2-build $WITHCOMPILER --disable-tests --disable-benchmarks all
 
-# REGENDATA ("0.9.20200406",["base16-bytestring.cabal"])
+# REGENDATA ("0.10.1",["base16-bytestring.cabal"])
 # EOF
diff --git a/Data/ByteString/Base16.hs b/Data/ByteString/Base16.hs
index bdcd935..42a7c83 100644
--- a/Data/ByteString/Base16.hs
+++ b/Data/ByteString/Base16.hs
@@ -1,32 +1,39 @@
-{-# LANGUAGE BangPatterns, MagicHash #-}
-
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE MagicHash #-}
 -- |
 -- Module      : Data.ByteString.Base16
 -- Copyright   : (c) 2011 MailRank, Inc.
 --
 -- License     : BSD
--- Maintainer  : bos@serpentine.com
+-- Maintainer  : Herbert Valerio Riedel <hvr@gnu.org>,
+--               Mikhail Glushenkov <mikhail.glushenkov@gmail.com>,
+--               Emily Pillmore <emilypi@cohomolo.gy>
 -- Stability   : experimental
 -- Portability : GHC
 --
 -- Fast and efficient encoding and decoding of base16-encoded strings.
-
+--
 module Data.ByteString.Base16
-    (
-      encode
-    , decode
-    ) where
+( encode
+, decode
+, decodeLenient
+) where
+
+import Data.ByteString (empty)
+import Data.ByteString.Base16.Internal
+import Data.ByteString.Internal
+
+import Foreign.ForeignPtr
+import Foreign.Ptr
+
+import GHC.ForeignPtr
+#if __GLASGOW_HASKELL__ >= 702
+import System.IO.Unsafe (unsafeDupablePerformIO)
+#else
+import GHC.IO (unsafeDupablePerformIO)
+#endif
 
-import Data.ByteString.Char8 (empty)
-import Data.ByteString.Internal (ByteString(..), createAndTrim', unsafeCreate)
-import Data.Bits (shiftL)
-import Foreign.ForeignPtr (ForeignPtr, withForeignPtr)
-import Foreign.Ptr (Ptr, minusPtr, plusPtr)
-import Foreign.Storable (peek, poke)
-import System.IO.Unsafe (unsafePerformIO)
-import GHC.Prim
-import GHC.Types
-import GHC.Word
 
 -- | Encode a string into base16 form.  The result will always be a
 -- multiple of 2 bytes in length.
@@ -37,108 +44,46 @@ import GHC.Word
 encode :: ByteString -> ByteString
 encode (PS sfp soff slen)
     | slen > maxBound `div` 2 =
-        error "Data.ByteString.Base16.encode: input too long"
-    | otherwise = unsafeCreate (slen*2) $ \dptr ->
-                    withForeignPtr sfp $ \sptr ->
-                      enc (sptr `plusPtr` soff) dptr
- where
-  enc sptr = go sptr where
-    e = sptr `plusPtr` slen
-    go s d | s == e = return ()
-           | otherwise = do
-      x <- peek8 s
-      poke d (tlookup tableHi x)
-      poke (d `plusPtr` 1) (tlookup tableLo x)
-      go (s `plusPtr` 1) (d `plusPtr` 2)
-    tlookup :: Addr# -> Int -> Word8
-    tlookup table (I# index) = W8# (indexWord8OffAddr# table index)
-    !tableLo =
-      "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66\
-      \\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x61\x62\x63\x64\x65\x66"#
-    !tableHi =
-      "\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\
-      \\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\x31\
-      \\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\x32\
-      \\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\x33\
-      \\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\x34\
-      \\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\x35\
-      \\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\x36\
-      \\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\x37\
-      \\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\x38\
-      \\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\x39\
-      \\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\x61\
-      \\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\x62\
-      \\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\x63\
-      \\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\x64\
-      \\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\x65\
-      \\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66"#
+      error "Data.ByteString.Base16.encode: input too long"
+    | otherwise = unsafeCreate (slen * 2) $ \dptr ->
+        withForeignPtr sfp $ \sptr ->
+          encodeLoop dptr
+          (sptr `plusPtr` soff)
+          (sptr `plusPtr` (soff + slen))
 
--- | Decode a string from base16 form. The first element of the
--- returned tuple contains the decoded data. The second element starts
--- at the first invalid base16 sequence in the original string.
+-- | Decode a base16-encoded 'ByteString' value.
 --
--- Examples:
+-- If errors are encountered during the decoding process,
+-- then they will be returned in the @Left@ clause of the
+-- coproduct.
 --
--- > decode "666f6f"  == ("foo", "")
--- > decode "66quux"  == ("f", "quux")
--- > decode "666quux" == ("f", "6quux")
-decode :: ByteString -> (ByteString, ByteString)
-decode (PS sfp soff slen) =
-  unsafePerformIO . createAndTrim' (slen `div` 2) $ \dptr ->
-      withForeignPtr sfp $ \sptr ->
-        dec (sptr `plusPtr` soff) dptr
- where
-  dec sptr = go sptr where
-    e = sptr `plusPtr` if odd slen then slen - 1 else slen
-    go s d | s == e = let len = e `minusPtr` sptr
-                      in return (0, len `div` 2, ps sfp (soff+len) (slen-len))
-           | otherwise = do
-      hi <- hex `fmap` peek8 s
-      lo <- hex `fmap` peek8 (s `plusPtr` 1)
-      if lo == 0xff || hi == 0xff
-        then let len = s `minusPtr` sptr
-             in return (0, len `div` 2, ps sfp (soff+len) (slen-len))
-        else do
-          poke d . fromIntegral $ lo + (hi `shiftL` 4)
-          go (s `plusPtr` 2) (d `plusPtr` 1)
-
-    hex (I# index) = W8# (indexWord8OffAddr# table index)
-    !table =
-        "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\
-        \\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
-        \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+decode :: ByteString -> Either String ByteString
+decode (PS sfp soff slen)
+    | slen == 0 = Right empty
+    | r /= 0 = Left "invalid bytestring size"
+    | otherwise = unsafeDupablePerformIO $ do
+      dfp <- mallocPlainForeignPtrBytes q
+      withForeignPtr dfp $ \dptr ->
+        withForeignPtr sfp $ \sptr ->
+          decodeLoop dfp dptr
+          (plusPtr sptr soff)
+          (plusPtr sptr (soff + slen))
+  where
+    !q = slen `quot` 2
+    !r = slen `rem` 2
 
-peek8 :: Ptr Word8 -> IO Int
-peek8 p = fromIntegral `fmap` peek p
-
-ps :: ForeignPtr Word8 -> Int -> Int -> ByteString
-ps fp off len
-    | len <= 0 = empty
-    | otherwise = PS fp off len
+decodeLenient :: ByteString -> ByteString
+decodeLenient (PS !sfp !soff !slen)
+  | slen == 0 = empty
+  | otherwise = unsafeDupablePerformIO $ do
+    dfp <- mallocPlainForeignPtrBytes dlen
+    withForeignPtr dfp $ \dptr ->
+      withForeignPtr sfp $ \sptr ->
+        lenientLoop
+          dfp
+          dptr
+          (plusPtr sptr soff)
+          (plusPtr sptr (soff + slen))
+  where
+    !q = slen `quot` 2
+    !dlen = q * 2
diff --git a/Data/ByteString/Base16/Internal.hs b/Data/ByteString/Base16/Internal.hs
new file mode 100644
index 0000000..ccd591d
--- /dev/null
+++ b/Data/ByteString/Base16/Internal.hs
@@ -0,0 +1,158 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+module Data.ByteString.Base16.Internal
+( -- * worker loops
+  encodeLoop
+, decodeLoop
+, lenientLoop
+  -- * utils
+, c2w
+, aix
+, reChunk
+, unsafeShiftR
+) where
+
+
+import Data.Bits ((.&.), (.|.))
+import qualified Data.ByteString as B
+import Data.ByteString.Internal (ByteString(..))
+import Data.Char (ord)
+
+import Foreign.ForeignPtr
+import Foreign.Ptr
+import Foreign.Storable
+
+import GHC.Word
+import GHC.Exts
+  (Int(I#), Addr#, indexWord8OffAddr#, word2Int#, uncheckedShiftRL#)
+
+
+-- ------------------------------------------------------------------ --
+-- Loops
+
+encodeLoop
+    :: Ptr Word8
+    -> Ptr Word8
+    -> Ptr Word8
+    -> IO ()
+encodeLoop !dptr !sptr !end = go dptr sptr
+  where
+    !hex = "0123456789abcdef"#
+
+    go !dst !src
+      | src == end = return ()
+      | otherwise = do
+        !t <- peek src
+
+        poke dst (aix (unsafeShiftR t 4) hex)
+        poke (plusPtr dst 1) (aix (t .&. 0x0f) hex)
+
+        go (plusPtr dst 2) (plusPtr src 1)
+{-# INLINE encodeLoop #-}
+
+decodeLoop
+  :: ForeignPtr Word8
+  -> Ptr Word8
+  -> Ptr Word8
+  -> Ptr Word8
+  -> IO (Either String ByteString)
+decodeLoop !dfp !dptr !sptr !end = go dptr sptr
+  where
+    err !src = return . Left
+      $ "invalid character at offset: "
+      ++ show (src `minusPtr` sptr)
+
+    !lo = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
+    !hi = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
+    go !dst !src
+      | src == end = return (Right (PS dfp 0 (src `minusPtr` sptr)))
+      | otherwise = do
+        !x <- peek src
+        !y <- peek (plusPtr src 1)
+
+        let !a = aix x hi
+            !b = aix y lo
+
+        if a == 0xff
+        then err src
+        else
+          if b == 0xff
+          then err (plusPtr src 1)
+          else do
+            poke dst (a .|. b)
+            go (plusPtr dst 1) (plusPtr src 2)
+{-# INLINE decodeLoop #-}
+
+lenientLoop
+  :: ForeignPtr Word8
+  -> Ptr Word8
+  -> Ptr Word8
+  -> Ptr Word8
+  -> IO ByteString
+lenientLoop !dfp !dptr !sptr !end = goHi dptr sptr 0
+  where
+    !lo = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
+    !hi = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x10\x20\x30\x40\x50\x60\x70\x80\x90\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xa0\xb0\xc0\xd0\xe0\xf0\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
+
+    goHi !dst !src !n
+      | src == end = return (PS dfp 0 n)
+      | otherwise = do
+        !x <- peek src
+
+        let !a = aix x hi
+
+        if a == 0xff
+        then goHi dst (plusPtr src 1) n
+        else goLo dst (plusPtr src 1) a n
+
+    goLo !dst !src !a !n
+      | src == end = return (PS dfp 0 n)
+      | otherwise = do
+        !y <- peek src
+
+        let !b = aix y lo
+
+        if b == 0xff
+        then goLo dst (plusPtr src 1) a n
+        else do
+          poke dst (a .|. b)
+          goHi (plusPtr dst 1) (plusPtr src 1) (n + 1)
+{-# LANGUAGE lenientLoop #-}
+
+
+-- ------------------------------------------------------------------ --
+-- Utils
+
+aix :: Word8 -> Addr# -> Word8
+aix (W8# w) table = W8# (indexWord8OffAddr# table (word2Int# w))
+{-# INLINE aix #-}
+
+-- | Form a list of chunks, and rechunk the list of bytestrings
+-- into length multiples of 2
+--
+reChunk :: [ByteString] -> [ByteString]
+reChunk [] = []
+reChunk (c:cs) = case B.length c `divMod` 2 of
+    (_, 0) -> c : reChunk cs
+    (n, _) -> case B.splitAt (n * 2) c of
+      ~(m, q) -> m : cont_ q cs
+  where
+    cont_ q [] = [q]
+    cont_ q (a:as) = case B.splitAt 1 a of
+      ~(x, y) -> let q' = B.append q x
+        in if B.length q' == 2
+          then
+            let as' = if B.null y then as else y:as
+            in q' : reChunk as'
+          else cont_ q' as
+
+unsafeShiftR :: Word8 -> Int -> Word8
+unsafeShiftR (W8# x#) (I# i#) = W8# (x# `uncheckedShiftRL#` i#)
+{-# INLINE unsafeShiftR #-}
+
+c2w :: Char -> Word8
+c2w = fromIntegral . ord
+{-# INLINE c2w #-}
diff --git a/Data/ByteString/Base16/Lazy.hs b/Data/ByteString/Base16/Lazy.hs
index a66abbe..9efdba3 100644
--- a/Data/ByteString/Base16/Lazy.hs
+++ b/Data/ByteString/Base16/Lazy.hs
@@ -1,5 +1,4 @@
 {-# LANGUAGE OverloadedStrings #-}
-
 -- |
 -- Module      : Data.ByteString.Base16.Lazy
 -- Copyright   : (c) 2011 MailRank, Inc.
@@ -12,17 +11,17 @@
 -- Fast and efficient encoding and decoding of base16-encoded strings.
 
 module Data.ByteString.Base16.Lazy
-    (
-      encode
-    , decode
-    ) where
+( encode
+, decode
+, decodeLenient
+) where
+
 
-import Data.Word (Word8)
+import qualified Data.ByteString as BS
+import qualified Data.ByteString.Lazy as LBS
 import qualified Data.ByteString.Base16 as B16
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Unsafe as B
-import qualified Data.ByteString.Lazy as BL
-import Data.ByteString.Lazy.Internal
+import Data.ByteString.Base16.Internal
+import Data.ByteString.Lazy.Internal (ByteString(..))
 
 -- | Encode a string into base16 form.  The result will always be a
 -- multiple of 2 bytes in length.
@@ -31,8 +30,8 @@ import Data.ByteString.Lazy.Internal
 --
 -- > encode "foo"  == "666f6f"
 encode :: ByteString -> ByteString
+encode Empty = Empty
 encode (Chunk c cs) = Chunk (B16.encode c) (encode cs)
-encode Empty        = Empty
 
 -- | Decode a string from base16 form. The first element of the
 -- returned tuple contains the decoded data. The second element starts
@@ -45,24 +44,24 @@ encode Empty        = Empty
 -- > decode "666f6f"  == ("foo", "")
 -- > decode "66quux"  == ("f", "quux")
 -- > decode "666quux" == ("f", "6quux")
-decode :: ByteString -> (ByteString, ByteString)
-decode = go Nothing
-  where
-      go :: Maybe Word8 -> ByteString -> (ByteString, ByteString)
-      go Nothing Empty = (Empty, Empty)
-      go (Just w) Empty = (Empty, BL.singleton w)
-      go (Just w) (Chunk c z) =
-           go Nothing (chunk (B.pack [w, B.unsafeHead c]) (chunk (B.unsafeTail c) z))
-      go Nothing (Chunk c z)
-           | len == 0 =
-                 let ~(res,tail') = go Nothing z
-                 in (chunk h res, tail')
-           | len == 1 && isHex (B.unsafeHead t) =
-                 let ~(res,tail') = go (Just (B.unsafeHead t)) z
-                 in (chunk h res, tail')
-           | otherwise = (chunk h Empty, chunk t z)
-            where (h,t) = B16.decode c
-                  len = B.length t
+decode :: ByteString -> Either String ByteString
+decode Empty = Right Empty
+decode (Chunk b bs) = case B16.decode b of
+  Right b' -> case decode bs of
+    Left t -> Left t
+    Right bs' -> Right (Chunk b' bs')
+  Left t -> Left t
 
-isHex :: Word8 -> Bool
-isHex w = (w >= 48 && w <= 57) || (w >= 97 && w <= 102) || (w >= 65 && w <= 70)
+-- | Decode a Base16-encoded 'ByteString' value leniently, using a
+-- strategy that never fails.
+--
+-- N.B.: this is not RFC 4648-compliant
+--
+decodeLenient :: ByteString -> ByteString
+decodeLenient = LBS.fromChunks
+  . fmap B16.decodeLenient
+  . reChunk
+  . fmap (BS.filter (flip BS.elem hex))
+  . LBS.toChunks
+  where
+    hex = BS.pack (fmap c2w "0123456789abcdef")
diff --git a/README.md b/README.md
index 5277eff..c84df11 100644
--- a/README.md
+++ b/README.md
@@ -5,18 +5,6 @@
 This package provides a Haskell library for working with base16-encoded
 data quickly and efficiently, using the `ByteString` type.
 
-
-# Performance
-
-This library is written in pure Haskell, and it's fast:
-
-* 250 MB/sec encoding
-
-* 200 MB/sec strict decoding (per RFC 4648)
-
-* 100 MB/sec lenient decoding
-
-
 # Get involved!
 
 Please report bugs via the
@@ -29,6 +17,7 @@ Master [Git repository](http://github.com/haskell/base16-bytestring):
 
 # Authors
 
-This library is written by [Bryan O'Sullivan](mailto:bos@serpentine.com). It
-is maintained by [Herbert Valerio Riedel](mailto:hvr@gnu.org) and [Mikhail
+This library is written by [Bryan O'Sullivan](mailto:bos@serpentine.com). 
+
+It is currently maintained by [Emily Pillmore](mailto:emilypi@cohomolo.gy), [Herbert Valerio Riedel](mailto:hvr@gnu.org) and [Mikhail
 Glushenkov](mailto:mikhail.glushenkov@gmail.com).
diff --git a/base16-bytestring.cabal b/base16-bytestring.cabal
index 5d18896..2727ea4 100644
--- a/base16-bytestring.cabal
+++ b/base16-bytestring.cabal
@@ -1,51 +1,87 @@
-cabal-version:       >=1.8
-name:                base16-bytestring
-version:             0.1.1.7
-synopsis:            Fast base16 (hex) encoding and decoding for ByteStrings
-description:         This package provides support for encoding and decoding binary data according
-                     to @base16@ (see also <https://tools.ietf.org/html/rfc4648 RFC 4648>) for
-                     strict (see "Data.ByteString.Base16") and lazy @ByteString@s (see "Data.ByteString.Base16.Lazy").
-                     .
-                     See also the <https://hackage.haskell.org/package/base-encoding base-encoding> package which
-                     provides an uniform API providing conversion paths between more binary and textual types.
-homepage:            http://github.com/haskell/base16-bytestring
-bug-reports:         http://github.com/haskell/base16-bytestring/issues
-license:             BSD3
-license-file:        LICENSE
-copyright:           Copyright 2011 MailRank, Inc.;
-                     Copyright 2010-2020 Bryan O'Sullivan et al.
-author:              Bryan O'Sullivan <bos@serpentine.com>
-maintainer:          Herbert Valerio Riedel <hvr@gnu.org>,
-                     Mikhail Glushenkov <mikhail.glushenkov@gmail.com>,
-                     Emily Pillmore <emilypi@cohomolo.gy>
-category:            Data
-build-type:          Simple
-extra-source-files:  README.md CHANGELOG.md
-tested-with:         GHC==8.10.1, GHC==8.8.3, GHC==8.6.5,
-                     GHC==8.4.4,  GHC==8.2.2, GHC==8.0.2,
-                     GHC==7.10.3, GHC==7.8.4, GHC==7.6.3,
-                     GHC==7.4.2,  GHC==7.2.2, GHC==7.0.4
+cabal-version:      >=1.12
+name:               base16-bytestring
+version:            1.0.0.0
+synopsis:           Fast base16 (hex) encoding and decoding for ByteStrings
+description:
+  This package provides support for encoding and decoding binary data according
+  to @base16@ (see also <https://tools.ietf.org/html/rfc4648 RFC 4648>) for
+  strict (see "Data.ByteString.Base16") and lazy @ByteString@s (see "Data.ByteString.Base16.Lazy").
+  .
+  See also the <https://hackage.haskell.org/package/base-encoding base-encoding> package which
+  provides an uniform API providing conversion paths between more binary and textual types.
+
+homepage:           http://github.com/haskell/base16-bytestring
+bug-reports:        http://github.com/haskell/base16-bytestring/issues
+license:            BSD3
+license-file:       LICENSE
+copyright:
+  Copyright 2011 MailRank, Inc.;
+  Copyright 2010-2020 Bryan O'Sullivan et al.
+
+author:             Bryan O'Sullivan <bos@serpentine.com>
+maintainer:
+  Herbert Valerio Riedel <hvr@gnu.org>,
+  Mikhail Glushenkov <mikhail.glushenkov@gmail.com>,
+  Emily Pillmore <emilypi@cohomolo.gy>
+
+category:           Data
+build-type:         Simple
+extra-source-files:
+  README.md
+  CHANGELOG.md
+
+tested-with:
+  GHC ==7.0.4
+   || ==7.2.2
+   || ==7.4.2
+   || ==7.6.3
+   || ==7.8.4
+   || ==7.10.3
+   || ==8.0.2
+   || ==8.2.2
+   || ==8.4.4
+   || ==8.6.5
+   || ==8.8.3
+   || ==8.10.1
+
+source-repository head
+  type:     git
+  location: http://github.com/haskell/base16-bytestring
 
 library
+  other-modules:   Data.ByteString.Base16.Internal
   exposed-modules:
     Data.ByteString.Base16
     Data.ByteString.Base16.Lazy
 
   build-depends:
-    base == 4.*,
-    bytestring >= 0.9,
-    ghc-prim
+      base        >=4   && <5
+    , bytestring  >=0.9 && <0.11
 
-  ghc-options: -Wall -funbox-strict-fields
-
-source-repository head
-  type:     git
-  location: http://github.com/haskell/base16-bytestring
+  ghc-options:     -Wall -funbox-strict-fields
+  default-language: Haskell2010
 
 test-suite test
-  type: exitcode-stdio-1.0
-  hs-source-dirs: tests
-  main-is: Tests.hs
-  build-depends: base
-               , base16-bytestring
-               , bytestring
+  type:             exitcode-stdio-1.0
+  hs-source-dirs:   tests
+  main-is:          Tests.hs
+  build-depends:
+      base
+    , base16-bytestring
+    , bytestring
+
+  default-language: Haskell2010
+
+benchmark bench
+  type:             exitcode-stdio-1.0
+  hs-source-dirs:   benchmarks
+  main-is:          Benchmarks.hs
+  build-depends:
+      base               >=4 && <5
+    , base16-bytestring
+    , bytestring
+    , criterion
+    , deepseq
+    , QuickCheck
+
+  default-language: Haskell2010
diff --git a/benchmarks/Benchmarks.hs b/benchmarks/Benchmarks.hs
index 6674161..93bdb0a 100644
--- a/benchmarks/Benchmarks.hs
+++ b/benchmarks/Benchmarks.hs
@@ -1,23 +1,49 @@
+module Main
+( main
+) where
+
+
+import Criterion
 import Criterion.Main
+
 import qualified Data.ByteString.Base16 as B16
 import qualified Data.ByteString as B
 
 generate :: Int -> B.ByteString
 generate n = B.pack . take n . cycle $ [0..255]
 
-main = defaultMain [
-         bgroup "encode" [
-           bench "8" $ whnf B16.encode (generate 8)
-         , bench "32" $ whnf B16.encode (generate 32)
-         , bench "128" $ whnf B16.encode (generate 128)
-         , bench "1024" $ whnf B16.encode (generate 1024)
-         , bench "65536" $ whnf B16.encode (generate 65536)
-         ]
-       , bgroup "decode" [
-           bench "8" $ whnf (B16.decode . B16.encode) (generate 8)
-         , bench "32" $ whnf (B16.decode . B16.encode) (generate 32)
-         , bench "128" $ whnf (B16.decode . B16.encode) (generate 128)
-         , bench "1024" $ whnf (B16.decode . B16.encode) (generate 1024)
-         , bench "65536" $ whnf (B16.decode . B16.encode) (generate 65536)
-         ]
-       ]
+main = defaultMain
+  [ case bs of
+      ~(a,b,c,d,e) -> bgroup "encode"
+        [ bench "25" $ whnf B16.encode a
+        , bench "100" $ whnf B16.encode b
+        , bench "1000" $ whnf B16.encode c
+        , bench "10000" $ whnf B16.encode d
+        , bench "100000" $ whnf B16.encode e
+        ]
+  , case bs of
+      ~(a,b,c,d,e) -> bgroup "decode"
+        [ bench "25" $ whnf B16.decode a
+        , bench "100" $ whnf B16.decode b
+        , bench "1000" $ whnf B16.decode c
+        , bench "10000" $ whnf B16.decode d
+        , bench "100000" $ whnf B16.decode e
+        ]
+  ]
+  where
+    bs =
+      let a = generate 25
+          b = generate 100
+          c = generate 1000
+          d = generate 10000
+          e = generate 100000
+      in (a,b,c,d,e)
+
+    bs' =
+      let a = generate 25
+          b = generate 100
+          c = generate 1000
+          d = generate 10000
+          e = generate 100000
+          f = B16.encode
+      in (f a, f b, f c, f d, f e)
diff --git a/benchmarks/base16-bytestring-benchmarks.cabal b/benchmarks/base16-bytestring-benchmarks.cabal
deleted file mode 100644
index 534a2ca..0000000
--- a/benchmarks/base16-bytestring-benchmarks.cabal
+++ /dev/null
@@ -1,14 +0,0 @@
-name:           base16-bytestring-benchmarks
-version:        0
-cabal-version:  >= 1.8
-build-type:     Simple
-
-executable bm
-  main-is: Benchmarks.hs
-
-  build-depends:
-    base,
-    base16-bytestring,
-    bytestring,
-    criterion >= 0.5.0.10,
-    text >= 0.11.0.8
diff --git a/tests/Tests.hs b/tests/Tests.hs
index dd9ab0b..4a89ad9 100644
--- a/tests/Tests.hs
+++ b/tests/Tests.hs
@@ -16,53 +16,5 @@ c2w = fromIntegral . ord
 
 main :: IO ()
 main = do
-    let hexL  = map c2w "0123456789abcdef"
-        hexU  = map c2w "0123456789ABCDEF"
-        hexUL = map c2w "0123456789ABCDEFabcdef"
-        notHex = [c | c <- [0..255], c `notElem` hexUL]
-        hexL2 = do a <- hexL; b <- hexL; [a,b]
-        hexU2 = do a <- hexU; b <- hexU; [a,b]
-        bytes = B.pack [0..255]
-
-    -- Encode every byte
-    True <- Base16.encode bytes `shouldBe` B.pack hexL2
-
-    -- Decode every valid hex pair
-    True <- Base16.decode (B.pack hexL2) `shouldBe` (bytes, B.empty)
-    True <- Base16.decode (B.pack hexU2) `shouldBe` (bytes, B.empty)
-
-    -- Decode every invalid byte paired with a correct byte
-    let bads1 = [B.pack [a,b] | a <- notHex, b <- hexUL]
-    let bads2 = [B.pack [a,b] | a <- hexUL, b <- notHex]
-    True <- map Base16.decode bads1 `shouldBe` map (\s -> (B.empty, s)) bads1
-    True <- map Base16.decode bads2 `shouldBe` map (\s -> (B.empty, s)) bads2
-
-    -- Like above, but start with a correct byte
-    let correctHex   = B.pack [97,98]
-        correctBytes = B.pack [171]
-    True <- map (Base16.decode . (correctHex `B.append`)) bads1
-            `shouldBe` map (\s -> (correctBytes, s)) bads1
-    True <- map (Base16.decode . (correctHex `B.append`)) bads2
-            `shouldBe` map (\s -> (correctBytes, s)) bads2
-
-    -- Like above, but end with a correct byte
-    True <- map (Base16.decode . (`B.append` correctHex)) bads1
-            `shouldBe` map (\s -> (B.empty, s `B.append` correctHex)) bads1
-    True <- map (Base16.decode . (`B.append` correctHex)) bads2
-            `shouldBe` map (\s -> (B.empty, s `B.append` correctHex)) bads2
-
-    -- Lazy decoding also works with odd length chunks
-    let encodedLazy = BL.fromChunks $ map (B.pack . map c2w) ["614","239","6","142","39"]
-    True <- Base16L.decode encodedLazy `shouldBe` (BL.pack . map c2w $ "aB9aB9",BL.empty)
-
-    -- Lazy decoding is lazy on success
-    let encodedLazy = BL.iterate id 48
-    True <- (BL.unpack . BL.take 8 . fst . Base16L.decode $ encodedLazy)
-            `shouldBe` [0,0,0,0,0,0,0,0]
-
-    -- Lazy decoding is lazy on failure
-    let encodedLazy = BL.iterate id 47
-    True <- (BL.unpack . BL.take 8 . fst . Base16L.decode $ encodedLazy)
-            `shouldBe` []
 
     return ()