Skip to content

Commit

Permalink
Optimize Loops for 32 and 64-bit Words (#8)
Browse files Browse the repository at this point in the history
optimize loops for Word32 and Word64, restructure project
  • Loading branch information
emilypi authored Feb 3, 2020
1 parent 2cd339c commit ff8f481
Show file tree
Hide file tree
Showing 11 changed files with 517 additions and 2,974 deletions.
5 changes: 5 additions & 0 deletions base64.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ library
, Data.Text.Encoding.Base64.URL

other-modules: Data.ByteString.Base64.Internal
Data.ByteString.Base64.Internal.Tail
Data.ByteString.Base64.Internal.Utils
Data.ByteString.Base64.Internal.W32.Loop
Data.ByteString.Base64.Internal.W64.Loop
Data.ByteString.Base64.Internal.W8.Loop

build-depends: base >=4.10 && <5
, bytestring ^>=0.10
Expand Down
7 changes: 5 additions & 2 deletions benchmarks/Base64Bench.hs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import Data.ByteString.Random (random)
main :: IO ()
main =
defaultMain
[ env bs $ \ ~(bs25,bs100,bs1k,bs10k,bs100k) ->
[ env bs $ \ ~(bs25,bs100,bs1k,bs10k,bs100k,bs1mm) ->
bgroup "encode"
[ bgroup "memory"
[ bench "25" $ whnf ctob bs25
Expand All @@ -45,13 +45,15 @@ main =
, bench "1000" $ whnf Bos.encode bs1k
, bench "10000" $ whnf Bos.encode bs10k
, bench "100000" $ whnf Bos.encode bs100k
, bench "1000000" $ whnf Bos.encode bs1mm
]
, bgroup "base64"
[ bench "25" $ whnf B64.encodeBase64' bs25
, bench "100" $ whnf B64.encodeBase64' bs100
, bench "1000" $ whnf B64.encodeBase64' bs1k
, bench "10000" $ whnf B64.encodeBase64' bs10k
, bench "100000" $ whnf B64.encodeBase64' bs100k
, bench "1000000" $ whnf B64.encodeBase64' bs1mm
]
]
]
Expand All @@ -65,4 +67,5 @@ main =
c <- random 1000
d <- random 10000
e <- random 100000
return (a,b,c,d,e)
f <- random 1000000
return (a,b,c,d,e,f)
1,401 changes: 0 additions & 1,401 deletions benchmarks/output-decode.html

This file was deleted.

1,401 changes: 0 additions & 1,401 deletions benchmarks/output-encode.html

This file was deleted.

15 changes: 8 additions & 7 deletions default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,21 @@ let

inherit (nixpkgs) pkgs;

f = { mkDerivation, base, base64-bytestring, bytestring
, criterion, deepseq, lens, memory, random-bytestring, stdenv
, tasty, tasty-hunit, text
f = { mkDerivation, base, base64-bytestring, bytestring, deepseq
, gauge, memory, random-bytestring, stdenv, tasty, tasty-hunit
, text
}:
mkDerivation {
pname = "base64";
version = "0.1.0.0";
version = "0.4.0";
src = ./.;
libraryHaskellDepends = [ base bytestring deepseq lens text ];
libraryHaskellDepends = [ base bytestring text ];
testHaskellDepends = [
base base64-bytestring random-bytestring tasty tasty-hunit text
base base64-bytestring bytestring random-bytestring tasty
tasty-hunit text
];
benchmarkHaskellDepends = [
base base64-bytestring bytestring criterion deepseq memory
base base64-bytestring bytestring deepseq gauge memory
random-bytestring text
];
homepage = "https://github.com/emilypi/base64";
Expand Down
185 changes: 22 additions & 163 deletions src/Data/ByteString/Base64/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,20 @@ module Data.ByteString.Base64.Internal
) where


#include "MachDeps.h"

import Data.Bits
import qualified Data.ByteString as BS
import Data.ByteString.Base64.Internal.Tail
import Data.ByteString.Base64.Internal.Utils
#if WORD_SIZE_IN_BITS == 32
import Data.ByteString.Base64.Internal.W32.Loop
#elif WORD_SIZE_IN_BITS == 64
import Data.ByteString.Base64.Internal.W64.Loop
#else
import Data.ByteString.Base64.Internal.W8.Loop
#endif

import Data.ByteString.Internal
import Data.Text (Text)
import qualified Data.Text as T
Expand Down Expand Up @@ -79,23 +91,6 @@ data Padding
-- ^ Do we not pad out the bytestring?
deriving Eq

-- | Allocate and fill @n@ bytes with some data
--
writeNPlainForeignPtrBytes
:: ( Storable a
, Storable b
)
=> Int
-> [a]
-> ForeignPtr b
writeNPlainForeignPtrBytes !n as = unsafeDupablePerformIO $ do
fp <- mallocPlainForeignPtrBytes n
withForeignPtr fp $ \p -> go p as
return (castForeignPtr fp)
where
go !_ [] = return ()
go !p (x:xs) = poke p x >> go (plusPtr p 1) xs

-- | Pack an 'Addr#' into an encoding table of 'Word16's
--
packTable :: Addr# -> EncodingTable
Expand Down Expand Up @@ -150,174 +145,38 @@ validateBase64 !alphabet (PS fp off l) =
-- -------------------------------------------------------------------------- --
-- Encode Base64

-- | Read 'Word8' index off alphabet addr
--
aix :: Word8 -> Addr# -> Word8
aix (W8# i) alpha = W8# (indexWord8OffAddr# alpha (word2Int# i))
{-# INLINE aix #-}

-- | Encoding inner loop. Packs 3 bytes from src pointer into
-- the first 6 bytes of 4 'Word8''s (using the encoding table,
-- as 2 'Word12''s ), writing these to the dst pointer.
--
innerLoop
:: Ptr Word16
-> Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> (Ptr Word8 -> Ptr Word8 -> IO ())
-> IO ()
innerLoop etable sptr dptr end finalize = go (castPtr sptr) dptr
where
go !src !dst
| plusPtr src 2 >= end = finalize (castPtr src) (castPtr dst)
| otherwise = do
#ifdef WORDS_BIGENDIAN
w <- peek @Word32 src
#else
w <- byteSwap32 <$> peek @Word32 src
#endif

!x <- peekElemOff etable (fromIntegral (unsafeShiftR w 20))
!y <- peekElemOff etable (fromIntegral ((unsafeShiftR w 8) .&. 0xfff))

poke dst x
poke (plusPtr dst 2) y

go (plusPtr src 3) (plusPtr dst 4)

-- | Unpadded encoding loop, finalized as a bytestring using the
-- resultant length count.
--
innerLoopNopad
:: Ptr Word16
-> Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> (Ptr Word8 -> Ptr Word8 -> Int -> IO ByteString)
-> IO ByteString
innerLoopNopad etable sptr dptr end finalize = go (castPtr sptr) dptr 0
where
go !src !dst !n
| plusPtr src 2 >= end = finalize (castPtr src) (castPtr dst) n
| otherwise = do
#ifdef WORDS_BIGENDIAN
w <- peek @Word32 src
#else
w <- byteSwap32 <$> peek @Word32 src
#endif
!x <- peekElemOff etable (fromIntegral (unsafeShiftR w 20))
!y <- peekElemOff etable (fromIntegral ((unsafeShiftR w 8) .&. 0xfff))

poke dst x
poke (plusPtr dst 2) y

go (plusPtr src 3) (plusPtr dst 4) (n + 4)

encodeBase64_ :: EncodingTable -> ByteString -> ByteString
encodeBase64_ (EncodingTable !aptr !efp) (PS !sfp !soff !slen) =
unsafeCreate dlen $ \dptr ->
withForeignPtr sfp $ \sptr ->
withForeignPtr efp $ \eptr ->
encodeBase64_'
aptr
withForeignPtr efp $ \eptr -> do
let !end = plusPtr sptr (soff + slen)
innerLoop
eptr
(plusPtr sptr soff)
(castPtr dptr)
(plusPtr sptr (soff + slen))
end
(loopTail aptr end)
where
!dlen = 4 * ((slen + 2) `div` 3)

encodeBase64_'
:: Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> IO ()
encodeBase64_' (Ptr !alpha) !etable !sptr !dptr !end =
innerLoop etable sptr dptr end finalize
where
finalize !src !dst
| src == end = return ()
| otherwise = do
!k <- peekByteOff src 0

let !a = shiftR (k .&. 0xfc) 2
!b = shiftL (k .&. 0x03) 4

pokeByteOff dst 0 (aix a alpha)

if plusPtr src 2 /= end
then do
pokeByteOff dst 1 (aix b alpha)
pokeByteOff @Word8 dst 2 0x3d
pokeByteOff @Word8 dst 3 0x3d
else do
!k' <- peekByteOff src 1

let !b' = shiftR (k' .&. 0xf0) 4 .|. b
!c' = shiftL (k' .&. 0x0f) 2

pokeByteOff dst 1 (aix b' alpha)
pokeByteOff dst 2 (aix c' alpha)
pokeByteOff @Word8 dst 3 0x3d


encodeBase64Nopad_ :: EncodingTable -> ByteString -> ByteString
encodeBase64Nopad_ (EncodingTable !aptr !efp) (PS !sfp !soff !slen) =
unsafeDupablePerformIO $ do
dfp <- mallocPlainForeignPtrBytes dlen
withForeignPtr dfp $ \dptr ->
withForeignPtr efp $ \etable ->
withForeignPtr sfp $ \sptr ->
encodeBase64Nopad_'
aptr
withForeignPtr sfp $ \sptr -> do
let !end = plusPtr sptr (soff + slen)
innerLoopNopad
etable
(plusPtr sptr soff)
(castPtr dptr)
(plusPtr sptr (soff + slen))
dfp
end
(loopTailNoPad dfp aptr end)
where
!dlen = 4 * ((slen + 2) `div` 3)

encodeBase64Nopad_'
:: Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> Ptr Word16
-> Ptr Word8
-> ForeignPtr Word8
-> IO ByteString
encodeBase64Nopad_' (Ptr !alpha) !etable !sptr !dptr !end !dfp =
innerLoopNopad etable sptr dptr end finalize
where
finalize !src !dst !n
| src == end = return (PS dfp 0 n)
| otherwise = do
!k <- peekByteOff src 0

let !a = shiftR (k .&. 0xfc) 2
!b = shiftL (k .&. 0x03) 4

pokeByteOff dst 0 (aix a alpha)

if plusPtr src 2 /= end
then do
pokeByteOff dst 1 (aix b alpha)
return (PS dfp 0 (n + 2))
else do
!k' <- peekByteOff src 1

let !b' = shiftR (k' .&. 0xf0) 4 .|. b
!c' = shiftL (k' .&. 0x0f) 2

-- ideally, we'd want to pack these is in a single write
--
pokeByteOff dst 1 (aix b' alpha)
pokeByteOff dst 2 (aix c' alpha)
return (PS dfp 0 (n + 3))


-- -------------------------------------------------------------------------- --
-- Decoding Base64
Expand Down
95 changes: 95 additions & 0 deletions src/Data/ByteString/Base64/Internal/Tail.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE TypeApplications #-}
-- |
-- Module : Data.ByteString.Base64.Internal.W32.Loop
-- Copyright : (c) 2019 Emily Pillmore
-- License : BSD-style
--
-- Maintainer : Emily Pillmore <emilypi@cohomolo.gy>
-- Stability : Experimental
-- Portability : portable
--
-- Finalizers for the encoding loop
--
module Data.ByteString.Base64.Internal.Tail
( loopTail
, loopTailNoPad
) where

import Data.Bits
import Data.ByteString.Internal
import Data.ByteString.Base64.Internal.Utils

import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Storable

import GHC.Exts
import GHC.Word

-- | Finalize an encoded bytestring by filling in the remaining
-- bytes and any padding
--
loopTail :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO ()
loopTail (Ptr !alpha) !end !src !dst
| src == end = return ()
| otherwise = do
!k <- peekByteOff src 0

let !a = shiftR (k .&. 0xfc) 2
!b = shiftL (k .&. 0x03) 4

pokeByteOff dst 0 (aix a alpha)

if plusPtr src 2 /= end
then do
pokeByteOff dst 1 (aix b alpha)
pokeByteOff @Word8 dst 2 0x3d
pokeByteOff @Word8 dst 3 0x3d
else do
!k' <- peekByteOff src 1

let !b' = shiftR (k' .&. 0xf0) 4 .|. b
!c' = shiftL (k' .&. 0x0f) 2

pokeByteOff dst 1 (aix b' alpha)
pokeByteOff dst 2 (aix c' alpha)
pokeByteOff @Word8 dst 3 0x3d
{-# INLINE loopTail #-}


-- | Finalize a bytestring by filling out the remaining bits
-- without padding.
--
loopTailNoPad
:: ForeignPtr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Ptr Word8
-> Int
-> IO ByteString
loopTailNoPad !dfp (Ptr !alpha) !end !src !dst !n
| src == end = return (PS dfp 0 n)
| otherwise = do
!k <- peekByteOff src 0

let !a = shiftR (k .&. 0xfc) 2
!b = shiftL (k .&. 0x03) 4

pokeByteOff dst 0 (aix a alpha)

if plusPtr src 2 /= end
then do
pokeByteOff dst 1 (aix b alpha)
return (PS dfp 0 (n + 2))
else do
!k' <- peekByteOff src 1

let !b' = shiftR (k' .&. 0xf0) 4 .|. b
!c' = shiftL (k' .&. 0x0f) 2

pokeByteOff dst 1 (aix b' alpha)
pokeByteOff dst 2 (aix c' alpha)
return (PS dfp 0 (n + 3))
{-# INLINE loopTailNoPad #-}
Loading

0 comments on commit ff8f481

Please sign in to comment.