Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Haddoc docs #36

Merged
merged 1 commit into from
Dec 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/Data/Bits/Pdep.hs
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
{-# LANGUAGE CPP #-}

{-|
Module : Data.Bits.Pdep.Prim
Description : Parallel deposit operations
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pdep
( Pdep(..)
, fastPdepEnabled
Expand All @@ -11,8 +19,17 @@ import qualified Data.Bits.Pdep.Prim as P

-- | Bitwise parallel deposit. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> pdep 1 1 :: Word64
-- 1
class Pdep a where
pdep :: a -> a -> a
pdep
:: a -- ^ the bitmap from which bits will be extracted
-> a -- ^ the bitmap selecting the bit locations that are to be deposited to
-> a -- ^ the bitmap containing the deposited bits with other bits bits cleared

instance Pdep Word where
pdep = P.primPdep
Expand All @@ -36,6 +53,9 @@ instance Pdep Word64 where

-- | Runtime flag indicating whether the 'pdep' function is using the high-performance.
-- BMI2 instruction set. A value of `False` indicates that `pdep` is emulated.
--
-- Actual performance when using the BMI2 instruction set will vary according to CPU
-- model. For example Intel CPUs currently outperform AMD CPUs in this area.
fastPdepEnabled :: Bool
fastPdepEnabled = P.fastPdepEnabled
{-# INLINE fastPdepEnabled #-}
48 changes: 48 additions & 0 deletions src/Data/Bits/Pdep/Prim.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}

{-|
Module : Data.Bits.Pdep.Prim
Description : Primop wrappers for the Parallel Deposit operation
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pdep.Prim
( primPdep
, primPdep8
Expand All @@ -18,10 +26,26 @@ import GHC.Prim
import Data.Bits.Pdep.Slow
#endif

-- | Bitwise parallel deposit for 'Word'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep 1 1
-- 1
primPdep :: Word -> Word -> Word
primPdep src mask = fromIntegral (primPdep64 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPdep #-}

-- | Bitwise parallel deposit for 'Word64'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep64 1 1
-- 1
primPdep64 :: Word64 -> Word64 -> Word64
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPdep64 (W64# src#) (W64# mask#) = W64# (pdep64# src# mask#)
Expand All @@ -30,6 +54,14 @@ primPdep64 = slowPdep
#endif
{-# INLINE primPdep64 #-}

-- | Bitwise parallel deposit for 'Word32'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep32 1 1
-- 1
primPdep32 :: Word32 -> Word32 -> Word32
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPdep32 (W32# src#) (W32# mask#) = W32# (pdep32# src# mask#)
Expand All @@ -38,10 +70,26 @@ primPdep32 = slowPdep
#endif
{-# INLINE primPdep32 #-}

-- | Bitwise parallel deposit for 'Word16'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep16 1 1
-- 1
primPdep16 :: Word16 -> Word16 -> Word16
primPdep16 src mask = fromIntegral (primPdep32 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPdep16 #-}

-- | Bitwise parallel deposit for 'Word8'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> primPdep8 1 1
-- 1
primPdep8 :: Word8 -> Word8 -> Word8
primPdep8 src mask = fromIntegral (primPdep32 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPdep8 #-}
Expand Down
16 changes: 16 additions & 0 deletions src/Data/Bits/Pdep/Slow.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
{-|
Module : Data.Bits.Pdep.Prim
Description : Parallel deposit operations (emulated)
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pdep.Slow
( SlowPdep(..)
) where
Expand All @@ -6,6 +14,14 @@ import Data.Bits
import GHC.Int
import GHC.Word

-- | Bitwise parallel deposit for 'Word64'. Deposits bits from the source at the locations
-- described by the mask.
--
-- Copies lower order bits from 'src' to 'mask' 1-bit locations in the return value;
-- 'mask' 0-bit locations in the return value will be cleared.
--
-- >>> slowPdep64 1 1
-- 1
slowPdep64 :: Word64 -> Word64 -> Word64
slowPdep64 = slowPdep64' 0

Expand Down
25 changes: 22 additions & 3 deletions src/Data/Bits/Pext.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}

{-|
Module : Data.Bits.Pext.Prim
Description : Parallel extract operations
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pext
( Pext(..)
, fastPextEnabled
Expand All @@ -10,10 +18,18 @@ import GHC.Word

import qualified Data.Bits.Pext.Prim as P

-- | Bitwise parallel extosit. extosits bits from the source at the locations
-- described by the mask.
-- | Parallel extract bits for 'Word64'
--
-- Copies selected bits from 'src' to contiguous low-order bits of the return value;
-- higher-order return value bits are cleared.
--
-- >>> pext 1 1 :: Word64
-- 1
class Pext a where
pext :: a -> a -> a
pext
:: a -- ^ the bitmap from which bits will be extracted
-> a -- ^ the bitmap selecting the bits that are to be extracted
-> a -- ^ the bitmap containing the extract bits with higher-order bits cleared

instance Pext Word where
pext = P.primPext
Expand All @@ -37,6 +53,9 @@ instance Pext Word64 where

-- | Runtime flag indicating whether the 'pext' function is using the high-performance.
-- BMI2 instruction set. A value of `False` indicates that `pext` is emulated.
--
-- Actual performance when using the BMI2 instruction set will vary according to CPU
-- model. For example Intel CPUs currently outperform AMD CPUs in this area.
fastPextEnabled :: Bool
fastPextEnabled = P.fastPextEnabled
{-# INLINE fastPextEnabled #-}
63 changes: 57 additions & 6 deletions src/Data/Bits/Pext/Prim.hs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}

{-|
Module : Data.Bits.Pext.Prim
Description : Primop wrappers for the Parallel Extract operation
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pext.Prim
( primPext
, primPext8
Expand All @@ -18,35 +26,78 @@ import GHC.Prim
import Data.Bits.Pext.Slow
#endif

primPext :: Word -> Word -> Word
primPext
:: Word
-> Word
-> Word
primPext src mask = fromIntegral (primPext64 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPext #-}

primPext64 :: Word64 -> Word64 -> Word64
-- | Parallel extract bits for 'Word64'
--
-- Copies selected bits from 'src' to contiguous low-order bits of the return value;
-- higher-order return value bits are cleared.
--
-- >>> primPext64 1 1
-- 1
primPext64
:: Word64 -- ^ the bitmap from which bits will be extracted
-> Word64 -- ^ the bitmap selecting the bits that are to be extracted
-> Word64 -- ^ the bitmap containing the extract bits with higher-order bits cleared
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPext64 (W64# src#) (W64# mask#) = W64# (pext64# src# mask#)
#else
primPext64 = slowPext
#endif
{-# INLINE primPext64 #-}

primPext32 :: Word32 -> Word32 -> Word32
-- | Parallel extract bits for 'Word32'
--
-- Copies selected bits from 'src' to contiguous low-order bits of the return value;
-- higher-order return value bits are cleared.
--
-- >>> primPext32 1 1
-- 1
primPext32
:: Word32 -- ^ the bitmap from which bits will be extracted
-> Word32 -- ^ the bitmap selecting the bits that are to be extracted
-> Word32 -- ^ the bitmap containing the extract bits with higher-order bits cleared
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
primPext32 (W32# src#) (W32# mask#) = W32# (pext32# src# mask#)
#else
primPext32 = slowPext
#endif
{-# INLINE primPext32 #-}

primPext16 :: Word16 -> Word16 -> Word16
-- | Parallel extract bits for 'Word16'
--
-- Copies selected bits from 'src' to contiguous low-order bits of the return value;
-- higher-order return value bits are cleared.
--
-- >>> primPext16 1 1
-- 1
primPext16
:: Word16 -- ^ the bitmap from which bits will be extracted
-> Word16 -- ^ the bitmap selecting the bits that are to be extracted
-> Word16 -- ^ the bitmap containing the extract bits with higher-order bits cleared
primPext16 src mask = fromIntegral (primPext32 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPext16 #-}

primPext8 :: Word8 -> Word8 -> Word8
-- | Parallel extract bits for 'Word8'
--
-- Copies selected bits from 'src' to contiguous low-order bits of the return value;
-- higher-order return value bits are cleared.
--
-- >>> primPext8 1 1
-- 1
primPext8
:: Word8 -- ^ the bitmap from which bits will be extracted
-> Word8 -- ^ the bitmap selecting the bits that are to be extracted
-> Word8 -- ^ the bitmap containing the extract bits with higher-order bits cleared
primPext8 src mask = fromIntegral (primPext32 (fromIntegral src) (fromIntegral mask))
{-# INLINE primPext8 #-}

-- | Runtime flag indicating whether the 'pext' function is using the high-performance.
-- | Runtime flag indicating whether the 'pext' function is using the high-performance
-- BMI2 instruction set. A value of `False` indicates that `pext` is emulated.
fastPextEnabled :: Bool
#if MIN_VERSION_base(4,11,0) && defined(BMI2_ENABLED)
Expand Down
18 changes: 16 additions & 2 deletions src/Data/Bits/Pext/Slow.hs
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
{-|
Module : Data.Bits.Pext.Prim
Description : Parallel extract operations (emulated)
Copyright : (c) John Ky, 2018-2019
License : BSD-3-Clause
Maintainer : newhoggy@gmail.com
Stability : stable
-}
module Data.Bits.Pext.Slow
( SlowPext(..)
) where

import Data.Bits
import GHC.Word

slowPext64 :: Word64 -> Word64 -> Word64
slowPext64
:: Word64 -- ^ the bitmap from which bits will be extracted
-> Word64 -- ^ the bitmap selecting the bits that are to be extracted
-> Word64 -- ^ the bitmap containing the extract bits with higher-order bits cleared
slowPext64 = slowPext64' 0 0 0

slowPext64' :: Word64 -> Int -> Int -> Word64 -> Word64 -> Word64
Expand All @@ -21,7 +32,10 @@ slowPext64' result offset index src mask = if index /= 64
-- | Bitwise parallel extract (emulated). Extract bits from the source at
-- the locations described by the mask.
class SlowPext a where
slowPext :: a -> a -> a
slowPext
:: a -- ^ the bitmap from which bits will be extracted
-> a -- ^ the bitmap selecting the bits that are to be extracted
-> a -- ^ the bitmap containing the extract bits with higher-order bits cleared

instance SlowPext Word where
slowPext s m = fromIntegral (slowPext64 (fromIntegral s) (fromIntegral m))
Expand Down