diff --git a/plutus-core/changelog.d/20230809_093202_koz.ross_bitwise.md b/plutus-core/changelog.d/20230809_093202_koz.ross_bitwise.md new file mode 100644 index 00000000000..2452d730960 --- /dev/null +++ b/plutus-core/changelog.d/20230809_093202_koz.ross_bitwise.md @@ -0,0 +1,41 @@ + + + +### Added + +- Bitwise primitive operations, according to + [CIP-0058](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0058). + + + + + diff --git a/plutus-core/cost-model/create-cost-model/CreateBuiltinCostModel.hs b/plutus-core/cost-model/create-cost-model/CreateBuiltinCostModel.hs index f0fdd26dd43..43a08a00213 100644 --- a/plutus-core/cost-model/create-cost-model/CreateBuiltinCostModel.hs +++ b/plutus-core/cost-model/create-cost-model/CreateBuiltinCostModel.hs @@ -123,6 +123,18 @@ builtinCostModelNames = BuiltinCostModelBase , paramBls12_381_finalVerify = "bls12_381_finalVerifyModel" , paramBlake2b_224 = "blake2b_224Model" , paramKeccak_256 = "keccak_256Model" + , paramIntegerToByteString = "integerToByteStringModel" + , paramByteStringToInteger = "byteStringToIntegerModel" + , paramAndByteString = "andByteStringModel" + , paramIorByteString = "iorByteStringModel" + , paramXorByteString = "xorByteStringModel" + , paramComplementByteString = "complementByteStringModel" + , paramShiftByteString = "shiftByteStringModel" + , paramRotateByteString = "rotateByteStringModel" + , paramPopCountByteString = "popCountByteStringModel" + , paramTestBitByteString = "testBitByteStringModel" + , paramWriteBitByteString = "writeBitByteStringModel" + , paramFindFirstSetByteString = "findFirstSetByteStringModel" } @@ -233,6 +245,19 @@ createBuiltinCostModel bmfile rfile = do paramBls12_381_millerLoop <- getParams bls12_381_millerLoop paramBls12_381_millerLoop paramBls12_381_mulMlResult <- getParams bls12_381_mulMlResult paramBls12_381_mulMlResult paramBls12_381_finalVerify <- getParams bls12_381_finalVerify paramBls12_381_finalVerify + -- Bitwise operations + paramIntegerToByteString <- getParams integerToByteString paramIntegerToByteString + paramByteStringToInteger <- getParams byteStringToInteger paramByteStringToInteger + paramAndByteString <- getParams andByteString paramAndByteString + paramIorByteString <- getParams iorByteString paramIorByteString + paramXorByteString <- getParams xorByteString paramXorByteString + paramComplementByteString <- getParams complementByteString paramComplementByteString + paramShiftByteString <- getParams shiftByteString paramShiftByteString + paramRotateByteString <- getParams rotateByteString paramRotateByteString + paramPopCountByteString <- getParams popCountByteString paramPopCountByteString + paramTestBitByteString <- getParams testBitByteString paramTestBitByteString + paramWriteBitByteString <- getParams writeBitByteString paramWriteBitByteString + paramFindFirstSetByteString <- getParams findFirstSetByteString paramFindFirstSetByteString paramKeccak_256 <- getParams keccak_256 paramKeccak_256 paramBlake2b_224 <- getParams blake2b_224 paramBlake2b_224 @@ -922,9 +947,80 @@ bls12_381_mulMlResult cpuModelR = do pure $ CostingFun cpuModel memModel bls12_381_finalVerify :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) -bls12_381_finalVerify cpuModelR= do +bls12_381_finalVerify cpuModelR = do cpuModel <- ModelTwoArgumentsConstantCost <$> readModelConstantCost cpuModelR let memModel = boolMemModel pure $ CostingFun cpuModel memModel +integerToByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelOneArgument) +integerToByteString cpuModelR = do + cpuModel <- ModelOneArgumentLinearCost <$> readModelLinearInX cpuModelR + let memModel = ModelOneArgumentLinearCost $ ModelLinearSize 0 1 + pure $ CostingFun cpuModel memModel + +byteStringToInteger :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelOneArgument) +byteStringToInteger cpuModelR = do + cpuModel <- ModelOneArgumentLinearCost <$> readModelLinearInX cpuModelR + let memModel = ModelOneArgumentLinearCost $ ModelLinearSize 0 1 + pure $ CostingFun cpuModel memModel + +andByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +andByteString cpuModelR = do + cpuModel <- ModelTwoArgumentsMaxSize <$> readModelMaxSize cpuModelR + let memModel = ModelTwoArgumentsMaxSize $ ModelMaxSize 0 1 + pure $ CostingFun cpuModel memModel + +iorByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +iorByteString cpuModelR = do + cpuModel <- ModelTwoArgumentsMaxSize <$> readModelMaxSize cpuModelR + let memModel = ModelTwoArgumentsMaxSize $ ModelMaxSize 0 1 + pure $ CostingFun cpuModel memModel + +xorByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +xorByteString cpuModelR = do + cpuModel <- ModelTwoArgumentsMaxSize <$> readModelMaxSize cpuModelR + let memModel = ModelTwoArgumentsMaxSize $ ModelMaxSize 0 1 + pure $ CostingFun cpuModel memModel + +complementByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelOneArgument) +complementByteString cpuModelR = do + cpuModel <- ModelOneArgumentLinearCost <$> readModelLinearInX cpuModelR + let memModel = ModelOneArgumentLinearCost $ ModelLinearSize 0 1 + pure $ CostingFun cpuModel memModel + +shiftByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +shiftByteString cpuModelR = do + cpuModel <- undefined + let memModel = undefined + pure $ CostingFun cpuModel memModel + +rotateByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +rotateByteString cpuModelR = do + cpuModel <- undefined -- FIXME + let memModel = undefined -- FIXME + pure $ CostingFun cpuModel memModel + +popCountByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelOneArgument) +popCountByteString cpuModelR = do + cpuModel <- ModelOneArgumentLinearCost <$> readModelLinearInX cpuModelR + let memModel = ModelOneArgumentLinearCost $ ModelLinearSize 0 1 -- FIXME + pure $ CostingFun cpuModel memModel + +testBitByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelTwoArguments) +testBitByteString cpuModelR = do + cpuModel <- undefined -- FIXME + let memModel = ModelTwoArgumentsConstantCost 1 + pure $ CostingFun cpuModel memModel + +writeBitByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelThreeArguments) +writeBitByteString cpuModelR = do + cpuModel <- undefined -- FIXME + let memModel = undefined -- FIXME + pure $ CostingFun cpuModel memModel + +findFirstSetByteString :: MonadR m => (SomeSEXP (Region m)) -> m (CostingFun ModelOneArgument) +findFirstSetByteString cpuModelR = do + cpuModel <- ModelOneArgumentLinearCost <$> readModelLinearInX cpuModelR + let memModel = undefined -- FIXME + pure $ CostingFun cpuModel memModel diff --git a/plutus-core/cost-model/data/builtinCostModel.json b/plutus-core/cost-model/data/builtinCostModel.json index e1fd262c885..8abf48f5249 100644 --- a/plutus-core/cost-model/data/builtinCostModel.json +++ b/plutus-core/cost-model/data/builtinCostModel.json @@ -899,5 +899,197 @@ "arguments": 10, "type": "constant_cost" } + }, + "integerToByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_cost" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_cost" + } + }, + "byteStringToInteger": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_cost" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "linear_cost" + } + }, + "andByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "max_size" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "max_size" + } + }, + "iorByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "max_size" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "max_size" + } + }, + "xorByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "max_size" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "max_size" + } + }, + "complementByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_cost" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_cost" + } + }, + "shiftByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "max_size" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "max_size" + } + }, + "rotateByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "max_size" + }, + "memory": { + "arguments": { + "intercept": 1, + "slope": 1 + }, + "type": "max_size" + } + }, + "popCountByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_cost" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_cost" + } + }, + "testBitByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_in_x" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_in_x" + } + }, + "writeBitByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_in_x" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_in_x" + } + }, + "findFirstSetByteString": { + "cpu": { + "arguments": { + "intercept": 9999999999, + "slope": 9999999999 + }, + "type": "linear_cost" + }, + "memory": { + "arguments": { + "intercept": 0, + "slope": 1 + }, + "type": "linear_cost" + } } } diff --git a/plutus-core/cost-model/data/models.R b/plutus-core/cost-model/data/models.R index cd77bb0bc87..60db2d3ab22 100644 --- a/plutus-core/cost-model/data/models.R +++ b/plutus-core/cost-model/data/models.R @@ -651,6 +651,20 @@ modelFun <- function(path) { bls12_381_mulMlResultModel <- constantModel ("Bls12_381_mulMlResult") bls12_381_finalVerifyModel <- constantModel ("Bls12_381_finalVerify") + ##### Bitwise operations ##### + integerToByteStringModel <- NULL ### FIXME + byteStringToIntegerModel <- NULL + andByteStringModel <- NULL + iorByteStringModel <- NULL + xorByteStringModel <- NULL + complementByteStringModel <- NULL + shiftByteStringModel <- NULL + rotateByteStringModel <- NULL + popCountByteStringModel <- NULL + testBitByteStringModel <- NULL + writeBitByteStringModel <- NULL + findFirstSetByteStringModel <- NULL + list( addIntegerModel = addIntegerModel, subtractIntegerModel = subtractIntegerModel, @@ -724,6 +738,18 @@ modelFun <- function(path) { bls12_381_G2_uncompressModel = bls12_381_G2_uncompressModel, bls12_381_millerLoopModel = bls12_381_millerLoopModel, bls12_381_mulMlResultModel = bls12_381_mulMlResultModel, - bls12_381_finalVerifyModel = bls12_381_finalVerifyModel + bls12_381_finalVerifyModel = bls12_381_finalVerifyModel, + integerToByteStringModel = integerToByteStringModel, + byteStringToIntegerModel = byteStringToIntegerModel, + andByteStringModel = andByteStringModel, + iorByteStringModel = iorByteStringModel, + xorByteStringModel = xorByteStringModel, + complementByteStringModel = complementByteStringModel, + shiftByteStringModel = shiftByteStringModel, + rotateByteStringModel = rotateByteStringModel, + popCountByteStringModel = popCountByteStringModel, + testBitByteStringModel = testBitByteStringModel, + writeBitByteStringModel = writeBitByteStringModel, + findFirstSetByteStringModel = findFirstSetByteStringModel ) -} +} \ No newline at end of file diff --git a/plutus-core/cost-model/test/TestCostModels.hs b/plutus-core/cost-model/test/TestCostModels.hs index 2e7546b25a8..b0660327929 100644 --- a/plutus-core/cost-model/test/TestCostModels.hs +++ b/plutus-core/cost-model/test/TestCostModels.hs @@ -419,9 +419,22 @@ main = , $(genTest 2 "bls12_381_millerLoop") Everywhere , $(genTest 2 "bls12_381_mulMlResult") Everywhere , $(genTest 2 "bls12_381_finalVerify") Everywhere - -- Keccak_256, Blake2b_224 , $(genTest 1 "keccak_256") , $(genTest 1 "blake2b_224") - ] + -- Bitwise operations + , $(genTest 1 "integerToByteString") + , $(genTest 1 "byteStringToInteger") + , $(genTest 2 "andByteString") Everywhere + , $(genTest 2 "iorByteString") Everywhere + , $(genTest 2 "xorByteString") Everywhere + , $(genTest 1 "complementByteString") + , $(genTest 2 "shiftByteString") Everywhere + , $(genTest 2 "rotateByteString") Everywhere + , $(genTest 1 "popCountByteString") + , $(genTest 2 "testBitByteString") Everywhere + , $(genTest 3 "writeBitByteString") + , $(genTest 1 "findFirstSetByteString") + ] + diff --git a/plutus-core/plutus-core.cabal b/plutus-core/plutus-core.cabal index 94624f30a72..6d367bca1cc 100644 --- a/plutus-core/plutus-core.cabal +++ b/plutus-core/plutus-core.cabal @@ -71,6 +71,7 @@ common lang library import: lang exposed-modules: + Bitwise Data.Either.Extras Data.List.Extras Data.MultiSet.Lens @@ -281,6 +282,7 @@ library , filepath , flat ^>=0.6 , free + , ghc-bignum ^>=1.3 , ghc-prim , hashable >=1.4 , hedgehog >=1.0 @@ -383,6 +385,7 @@ test-suite untyped-plutus-core-test DeBruijn.Spec DeBruijn.UnDeBruijnify Evaluation.Builtins + Evaluation.Builtins.Bitwise Evaluation.Builtins.BLS12_381 Evaluation.Builtins.BLS12_381.TestClasses Evaluation.Builtins.BLS12_381.Utils @@ -992,3 +995,46 @@ test-suite index-envs-test , quickcheck-instances , tasty , tasty-quickcheck + +benchmark bitwise + import: lang + type: exitcode-stdio-1.0 + hs-source-dirs: plutus-core/bench/bitwise + default-language: Haskell2010 + main-is: Main.hs + other-modules: + Benches.Binary + Benches.BitRead + Benches.BitWrite + Benches.Complement + Benches.Convert + Benches.CountLeadingZeroes + Benches.Popcount + Benches.Rotate + Benches.Shift + DataGen + + include-dirs: plutus-core/bench/bitwise/cbits + c-sources: + plutus-core/bench/bitwise/cbits/binary.c + plutus-core/bench/bitwise/cbits/bit-access.c + plutus-core/bench/bitwise/cbits/clz.c + plutus-core/bench/bitwise/cbits/complement.c + plutus-core/bench/bitwise/cbits/popcount.c + plutus-core/bench/bitwise/cbits/rotate.c + plutus-core/bench/bitwise/cbits/shift.c + + cc-options: -O3 + + if arch(x86_64) + cc-options: -mpopcnt -mabm + + build-depends: + , base + , bytestring + , mtl + , random + , tasty + , tasty-bench + + ghc-options: -O2 -rtsopts "-with-rtsopts=-A32m --nonmoving-gc -T" diff --git a/plutus-core/plutus-core/bench/bitwise/BENCHES.md b/plutus-core/plutus-core/bench/bitwise/BENCHES.md new file mode 100644 index 00000000000..0a1dcc5ce1e --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/BENCHES.md @@ -0,0 +1,900 @@ +# Benchmarks of bitwise operations for Plutus core + +## Introduction + +## Conventions + +We use _kilobyte_ (_KiB_ for short) to mean $2^{10}$ (1024) bytes. + +We assume versions of Haskell libraries as defined by Plutus and determined by +`cabal new-freeze`. In particular, the following library versions are assumed to +be in use: + +* `bytestring-0.10.12.0` +* `wide-word-0.1.1.2` + +An operation is _bit-parallel_ if it treats its inputs as vectors of bits and +assumes no other structure. For example [bitwise logical +AND](https://en.wikipedia.org/wiki/Bitwise_operation#AND) is bit-parallel, while +a [bitwise +rotation](https://en.wikipedia.org/wiki/Bitwise_operation#Circular_shift) isn't. +This extends the definition of right-to-left computability defined in [Hacker's +Delight](https://en.wikipedia.org/wiki/Hacker%27s_Delight), but is stricter. + +We use _population count_ to mean the number of 1 bits. We use this term with +individual bytes, words, or sequences of either. + +## Background + +Plutus Core, which is designed to be executed on-chain, has unusual limitations +regarding performance. Specifically, the size of its possible arguments is +_significantly_ bounded: according to +[CIP-0009](https://github.com/cardano-foundation/CIPs/tree/master/CIP-0009), the +current on-chain transaction size limit is 16384 bytes (16KiB), which includes +both script sources and arguments. While this limit could rise in the future, +currently, it means that `ByteString` arguments larger than about 2KiB aren't +practical. This implies that: + +* Asymptotically-optimal solutions may not be worthwhile if their 'ramp-up' + requires inputs approaching, or larger than, 2KiB; and +* Small inputs must be considered the norm. + +### Spatial and temporal locality + +## Implementation + +### Bitwise binary operations + +The naive approach (and our first) uses a combination of `fromListN` and `zip` +together with the operator in question; in essence, this streams both arguments +into a list of `Word8`s, combines them with the given operator, then 'repacks' +into a `ByteString`. This approach, while easy to implement, is likely to be +fairly slow once the size of its arguments becomes even moderately large. We +enumerate the reasons for this hypothesis below; many of these are based on the +reasoning described in [the spatial and temporal locality](#spatial-and-temporal-locality) +section. + +* The `ByteString` library + [does not use the size hint](https://hackage.haskell.org/package/bytestring-0.10.12.0/docs/src/Data.ByteString.Internal.html#line-194) + given to `fromListN`. This requires a traversal of the input list, which + forces it, making any stream fusion from GHC ineffective. +* Haskell linked lists lack spatial locality, and due to the above, we have to + force spatially local data into a spatially non-local form, only to then + immediately convert it _back_ to a spatially local form again. This not only + requires copying the data, it also means that we lose the benefits of spatial + locality for no reason. +* Haskell linked lists full of `Word8` lack temporal locality: even though a + modern machine can fit eight `Word8`s into a single register, if the data is + in a list, this cannot be done. + +These concerns were seen as sufficient to warrant the introduction of +`packZipWith`, which [avoids creating an intermediate list](https://github.com/haskell/bytestring/pull/295). +This operation amounts to creating an empty `ByteString` of the right length, +then performing the 'zipping' in-place. This avoids the problems of spatial +locality, and in theory could also avoid problems of temporal locality if the +operation being performed is bit-parallel. However, in general, 'zipping' +operations on `Word8`s cannot be assumed to be bit-parallel, which requires a +more conservative, 'byte-at-a-time' approach. Furthermore, `packZipWith` only +became available in the `bytestring` library as of [release +0.11.1.0](https://hackage.haskell.org/package/bytestring-0.11.1.0/changelog). +Thus, we replicate it for our second approach. + +All of our bitwise binary operations are bit-parallel by their definition; this +allows an implementation where we exploit temporal locality to process eight +bytes at a time. This essentially mirrors what the second approach does, but +instead first takes a maximal number of 'big steps' (eight bytes at a time), +followed by any remaining inputs being processed one byte at a time. We take +this as our third approach. We also attempt a fourth approach, where we take +even larger steps, using the `Word256` type from `wide-word`: this amounts to a +four-way [loop unroll](https://en.wikipedia.org/wiki/Loop_unrolling), as GHC +cannot currently generate SIMD instructions, even for bit-parallel operations. +This can, in theory, still be beneficial, due to +[ILP](https://en.wikipedia.org/wiki/Instruction-level_parallelism) being +available on most modern CPUs. More specifically, the third approach works as +follows: + +1. Allocate an empty `ByteString` of the correct length. +2. While at least eight bytes of both inputs are remaining, perform the bitwise + operation on an eight-byte chunk of both inputs, then write the result to the + corresponding part of the empty `ByteString` from 1. +3. For the remaining bytes, perform the bitwise operation on each of the + corresponding bytes of the input, writing them to the empty `ByteString` from + 1. + +The fourth approach works as follows: + +1. Allocate an empty `ByteString` of the correct length. +2. While at least 32 bytes of both inputs are remaining, perform the bitwise + operation on a 32-byte chunk of both inputs, then write the result to the + corresponding part of the empty `ByteString` from 1. +3. While at least eight bytes of both inputs are remaining, perform the bitwise + operation on an eight-byte chunk of both inputs, then write the result to the + corresponding part of the empty `ByteString` from 1. +4. For the remaining bytes, perform the bitwise operation on each of the + corresponding bytes of the input, writing them to the empty `ByteString` from + 1. + +### Bitwise complement + +A naive (and our first) approach would use `map` from `bytestring`: this is +implemented as a [loop over a preconstructed empty +`bytestring`](https://hackage.haskell.org/package/bytestring-0.10.12.0/docs/src/Data.ByteString.html#map), +and thus has good spatial locality. In theory, if given a bit-parallel +operation, it could make use of temporal locality by operating on larger widths +(such as whole machine words), but as `Word8` operations cannot be assumed to be +bit-parallel in general, it must work 'byte-at-a-time'. In our case, bitwise +complement _is_ bit-parallel, so our second approach attempts to make use of +this fact, essentially doing the following: + +1. Allocate an empty `ByteString` the same length as the input. +2. While at least eight bytes of the input remains, determine the bitwise + complement of an eight-byte chunk of the input, then write it to the + corresponding part of the empty `ByteString` from 1. +3. For the remaining bytes, perfor the bitwise complement on each of the + corresponding bytes of the input, writing them to the empty `ByteString` from + 1. + +We also define a third approach which takes even larger steps, using the +`Word256` type from `wide-word`. This amounts to a four-way loop unroll, as GHC +cannot currently generate SIMD instructions, even for bit-parallel operations. +This can, in theory, still be beneficial, due to ILP being available on most +modern CPUs. More specifically, the third approach works as follows: + +1. Allocate an empty `ByteString` the same length as the input. +2. While at least 32 bytes of of the input remains, determine the bitwise + complement of a 32-byte chunk of the input, then write it to the + corresponding part of the empty `ByteString` from 1. +3. While at least eight bytes of the input remains, determine the bitwise + complement of an eight-byte chunk of the input, then write it to the + corresponding part of the empty `ByteString` from 1. +4. For the remaining bytes, perfor the bitwise complement on each of the + corresponding bytes of the input, writing them to the empty `ByteString` from + 1. + +### Population count + +A naive approach would involve a fold over the `Word8`s in the argument, summing +the result of `popCount`. This forms our first approach, using the `foldl'` +function provided by `bytestring`. This approach makes good use of spatial +locality, but not particularly good use of temporal locality: each `Word8` we +load into a register to population count still requires a memory transfer, but +we only population count 8 bits, rather than the 64 bits that could fit into the +register. Moreover, x86_64 platforms have efficient instructions dedicated to +population counting, which can easily count a whole register's worth of bits. +Thu, our second approach makes use of this capability by doing two 'phases' of +counting: firstly, we count eight-byte chunks, then finish what remains one byte +at a time. Specifically, we do the following: + +1. Initialize a counter to 0. +2. While at least eight bytes of the input remains, population count an + eight-byte chunk, then add the result to the counter. +3. While any bytes of the input remain, population count one byte, then add the + result to the counter. +4. Return the counter. + +We also define a third approach which takes even larger chunks, using the +`Word256` type from `wide-word`. This amounts to a four-way loop unroll, as +there are no specialized instructions for population counting chunks larger than +eight bytes on any current architectures supported by GHC. This can, in theory, +still be beneficial, due to ILP being available on most modern CPUs. More +specifically, the third approach works as follows: + +1. Initialize a counter to 0. +2. While at least 32 bytes of input remains, population count a 32-byte chunk, + then add the result to the counter. +3. While at least eight bytes of the input remains, population count an + eight-byte chunk, then add the result to the counter. +4. While any bytes of the input remain, population count one byte, then add the + result to the counter. +5. Return the counter. + +## Methodology + +### Bitwise binary operations + +We benchmark only bitwise AND, as the other operations do not differ +structurally, are also bit-parallel, and given a fixed width of inputs, are +constant time relative that width. We implement all four approaches; for the +third and fourth approaches, we implement them both in Haskell and in C, which +is called via the FFI. This allows us to see if any overheads are being +introduced by GHC. + +We use pairs of inputs of the following lengths: + +* 1 +* 3 +* 7 +* 15 +* 31 +* 63 +* 127 +* 255 +* 511 +* 1023 +* 2047 + +We choose these values to disadvantage the third and fourth approaches as much +as possible, as values that are one less than a power of 2 in length would +require them to do the most work in their last step. + +We compare all approaches to the first (that is, naive) one. + +### Bitwise complement + +We implement all three approaches; for the second approach, we implement it both +in Haskell and in C, which is called via the FFI. This is to determine whether +any overheads are being introduced by GHC. + +We use inputs of the following lengths: + +* 1 +* 3 +* 7 +* 15 +* 31 +* 63 +* 127 +* 255 +* 511 +* 1023 +* 2047 + +We choose these values to disadvantage the second and third approaches as much +as possible, as values that are one less than a power of 2 in length would +require them to do the most work in their last step. + +We compare all approaches to the first (that is, naive) one. + +### Population count + +We implement all three approaches. We use inputs of the following lengths: + +* 1 +* 3 +* 7 +* 15 +* 31 +* 63 +* 127 +* 255 +* 511 +* 1023 +* 2047 + +We choose these values to disadvantage the second and third approaches as much +as possible, as values that are one less than a power of 2 in length would +require them to do the most work in their second-to-last step. + +## Results + +Throughout, we run benchmarks (implemented with `tasty-bench`) with `--stdev=1 +--timeout=200` to ensure minimal interference and accurate readings, while +avoiding timeouts due to the increased time required to get accurate readings. + +### Bitwise binary operations + +The results of our benchmarks given the methodology we described are below. +Throughout, `zipWith` refers to the first approach, `packZipWith` to the second +approach, `chunkedZipWith (2 blocks)` to the third approach, and `chunkedZipWith +(3 blocks)` to the fourth approach. We also mark the C implementations of the +third and fourth approach. All multipliers are shows as multiples of the running +time of the first approach on the same length of data. + +``` +All + Bitwise AND + Bitwise AND, length 1 + zipWith: OK (0.86s) + 46.2 ns ± 864 ps, 254 B allocated, 0 B copied, 45 MB peak memory + packZipWith: OK (5.96s) + 177 ns ± 2.3 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 3.83x + chunkedZipWith (2 blocks): OK (6.00s) + 178 ns ± 1.0 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 3.85x + chunkedZipWith (2 blocks, C): OK (12.14s) + 180 ns ± 904 ps, 359 B allocated, 95 B copied, 52 MB peak memory, 3.89x + chunkedZipWith (3 blocks): OK (3.15s) + 184 ns ± 2.1 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 3.99x + chunkedZipWith (3 blocks, C): OK (0.78s) + 180 ns ± 3.5 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 3.90x + Bitwise AND, length 3 + zipWith: OK (1.85s) + 54.3 ns ± 530 ps, 350 B allocated, 0 B copied, 52 MB peak memory + packZipWith: OK (0.78s) + 181 ns ± 3.2 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 3.33x + chunkedZipWith (2 blocks): OK (6.28s) + 186 ns ± 1.3 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 3.43x + chunkedZipWith (2 blocks, C): OK (0.79s) + 184 ns ± 3.5 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 3.39x + chunkedZipWith (3 blocks): OK (6.39s) + 189 ns ± 3.7 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 3.47x + chunkedZipWith (3 blocks, C): OK (100.97s) + 187 ns ± 3.1 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 3.45x + Bitwise AND, length 7 + zipWith: OK (1.18s) + 68.2 ns ± 1.0 ns, 508 B allocated, 0 B copied, 52 MB peak memory + packZipWith: OK (0.78s) + 182 ns ± 3.2 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 2.67x + chunkedZipWith (2 blocks): OK (3.25s) + 191 ns ± 942 ps, 358 B allocated, 95 B copied, 52 MB peak memory, 2.80x + chunkedZipWith (2 blocks, C): OK (3.17s) + 188 ns ± 1.2 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 2.75x + chunkedZipWith (3 blocks): OK (3.27s) + 194 ns ± 966 ps, 358 B allocated, 95 B copied, 52 MB peak memory, 2.85x + chunkedZipWith (3 blocks, C): OK (3.28s) + 193 ns ± 2.4 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 2.83x + Bitwise AND, length 15 + zipWith: OK (0.91s) + 106 ns ± 1.7 ns, 844 B allocated, 0 B copied, 52 MB peak memory + packZipWith: OK (3.23s) + 191 ns ± 2.7 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 1.80x + chunkedZipWith (2 blocks): OK (1.68s) + 198 ns ± 3.4 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 1.87x + chunkedZipWith (2 blocks, C): OK (1.69s) + 196 ns ± 2.6 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 1.85x + chunkedZipWith (3 blocks): OK (13.48s) + 201 ns ± 1.2 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 1.90x + chunkedZipWith (3 blocks, C): OK (3.40s) + 200 ns ± 1.2 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 1.88x + Bitwise AND, length 31 + zipWith: OK (1.69s) + 199 ns ± 1.9 ns, 1.5 KB allocated, 0 B copied, 52 MB peak memory + packZipWith: OK (6.10s) + 182 ns ± 1.4 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.91x + chunkedZipWith (2 blocks): OK (3.15s) + 186 ns ± 2.0 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.93x + chunkedZipWith (2 blocks, C): OK (0.80s) + 184 ns ± 3.4 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.92x + chunkedZipWith (3 blocks): OK (6.55s) + 194 ns ± 1.6 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.97x + chunkedZipWith (3 blocks, C): OK (3.28s) + 194 ns ± 3.4 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.97x + Bitwise AND, length 63 + zipWith: OK (0.79s) + 371 ns ± 6.4 ns, 2.8 KB allocated, 0 B copied, 52 MB peak memory + packZipWith: OK (7.81s) + 233 ns ± 918 ps, 359 B allocated, 95 B copied, 52 MB peak memory, 0.63x + chunkedZipWith (2 blocks): OK (3.84s) + 228 ns ± 2.2 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.61x + chunkedZipWith (2 blocks, C): OK (0.93s) + 215 ns ± 3.2 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.58x + chunkedZipWith (3 blocks): OK (3.84s) + 226 ns ± 2.7 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.61x + chunkedZipWith (3 blocks, C): OK (0.96s) + 223 ns ± 3.5 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.60x + Bitwise AND, length 127 + zipWith: OK (1.43s) + 665 ns ± 11 ns, 5.4 KB allocated, 1 B copied, 52 MB peak memory + packZipWith: OK (0.91s) + 209 ns ± 3.8 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.31x + chunkedZipWith (2 blocks): OK (0.82s) + 181 ns ± 2.8 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.27x + chunkedZipWith (2 blocks, C): OK (52.40s) + 196 ns ± 2.6 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.29x + chunkedZipWith (3 blocks): OK (0.87s) + 191 ns ± 3.0 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.29x + chunkedZipWith (3 blocks, C): OK (3.26s) + 190 ns ± 2.6 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.29x + Bitwise AND, length 255 + zipWith: OK (0.75s) + 1.38 μs ± 22 ns, 11 KB allocated, 3 B copied, 52 MB peak memory + packZipWith: OK (4.49s) + 263 ns ± 898 ps, 358 B allocated, 95 B copied, 52 MB peak memory, 0.19x + chunkedZipWith (2 blocks): OK (1.77s) + 204 ns ± 3.5 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.15x + chunkedZipWith (2 blocks, C): OK (1.81s) + 208 ns ± 3.6 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.15x + chunkedZipWith (3 blocks): OK (14.16s) + 210 ns ± 940 ps, 359 B allocated, 95 B copied, 52 MB peak memory, 0.15x + chunkedZipWith (3 blocks, C): OK (3.41s) + 197 ns ± 2.1 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.14x + Bitwise AND, length 511 + zipWith: OK (1.42s) + 2.67 μs ± 43 ns, 21 KB allocated, 2 B copied, 52 MB peak memory + packZipWith: OK (1.48s) + 332 ns ± 3.1 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.12x + chunkedZipWith (2 blocks): OK (1.94s) + 219 ns ± 3.0 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.08x + chunkedZipWith (2 blocks, C): OK (3.74s) + 217 ns ± 2.2 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.08x + chunkedZipWith (3 blocks): OK (1.04s) + 230 ns ± 2.7 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.09x + chunkedZipWith (3 blocks, C): OK (1.90s) + 214 ns ± 2.9 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.08x + Bitwise AND, length 1023 + zipWith: OK (0.78s) + 5.68 μs ± 100 ns, 42 KB allocated, 55 B copied, 52 MB peak memory + packZipWith: OK (1.13s) + 499 ns ± 7.9 ns, 341 B allocated, 91 B copied, 52 MB peak memory, 0.09x + chunkedZipWith (2 blocks): OK (1.28s) + 282 ns ± 3.7 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.05x + chunkedZipWith (2 blocks, C): OK (1.29s) + 280 ns ± 3.9 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.05x + chunkedZipWith (3 blocks): OK (1.38s) + 297 ns ± 4.7 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.05x + chunkedZipWith (3 blocks, C): OK (1.29s) + 283 ns ± 3.2 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.05x + Bitwise AND, length 2047 + zipWith: OK (0.78s) + 11.2 μs ± 170 ns, 85 KB allocated, 88 B copied, 52 MB peak memory + packZipWith: OK (3.50s) + 798 ns ± 4.1 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.07x + chunkedZipWith (2 blocks): OK (1.76s) + 385 ns ± 3.5 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.03x + chunkedZipWith (2 blocks, C): OK (6.57s) + 379 ns ± 1.6 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.03x + chunkedZipWith (3 blocks): OK (1.88s) + 414 ns ± 4.8 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.04x + chunkedZipWith (3 blocks, C): OK (1.78s) + 395 ns ± 3.1 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.04x +``` + +We observe that, up to length 15, the first approach comes out ahead, especially +on smaller inputs. However, at around length 30, a 'phase transition' occurs, +where the other approaches win out, with this becoming increasingly apparent as +we get to the limits of our sizes. In particular, in the middle of the size +range (between 63 and 511 bytes inclusive), other approaches beat out the naive +one by a factor of between 2 and 10, which is not insignificant. We also note +that the first approach allocates substantially more than the others, likely due +to lists it cannot fuse away; all other approaches have fixed allocations. + +It is not clear from the above whether the second, third or fourth approaches +are better in general; to this end, we ran only these in isolation, comparing +the third and fourth approaches to the second: + +``` +All + Packed bitwise AND + Packed bitwise AND, length 31 + packZipWith: OK (3.34s) + 390 ns ± 2.8 ns, 3.0 KB allocated, 0 B copied, 45 MB peak memory + chunkedZipWith (2 blocks): OK (6.21s) + 184 ns ± 2.7 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.47x + chunkedZipWith (2 blocks, C): OK (1.57s) + 184 ns ± 1.5 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.47x + chunkedZipWith (3 blocks): OK (3.20s) + 189 ns ± 1.7 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.48x + chunkedZipWith (3 blocks, C): OK (1.62s) + 190 ns ± 2.8 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.49x + Packed bitwise AND, length 63 + packZipWith: OK (0.74s) + 679 ns ± 12 ns, 5.8 KB allocated, 0 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (6.91s) + 208 ns ± 2.2 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.31x + chunkedZipWith (2 blocks, C): OK (3.93s) + 236 ns ± 1.3 ns, 430 B allocated, 95 B copied, 52 MB peak memory, 0.35x + chunkedZipWith (3 blocks): OK (8.05s) + 239 ns ± 796 ps, 359 B allocated, 95 B copied, 52 MB peak memory, 0.35x + chunkedZipWith (3 blocks, C): OK (8.10s) + 242 ns ± 2.2 ns, 431 B allocated, 95 B copied, 52 MB peak memory, 0.36x + Packed bitwise AND, length 127 + packZipWith: OK (0.73s) + 1.40 μs ± 22 ns, 11 KB allocated, 1 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (50.76s) + 188 ns ± 2.6 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.13x + chunkedZipWith (2 blocks, C): OK (1.67s) + 191 ns ± 2.4 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.14x + chunkedZipWith (3 blocks): OK (0.84s) + 193 ns ± 3.5 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.14x + chunkedZipWith (3 blocks, C): OK (0.84s) + 191 ns ± 3.1 ns, 424 B allocated, 94 B copied, 52 MB peak memory, 0.14x + Packed bitwise AND, length 255 + packZipWith: OK (2.85s) + 2.69 μs ± 22 ns, 23 KB allocated, 7 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (6.91s) + 204 ns ± 1.7 ns, 359 B allocated, 95 B copied, 52 MB peak memory, 0.08x + chunkedZipWith (2 blocks, C): OK (0.87s) + 199 ns ± 2.9 ns, 424 B allocated, 94 B copied, 52 MB peak memory, 0.07x + chunkedZipWith (3 blocks): OK (3.61s) + 213 ns ± 1.4 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.08x + chunkedZipWith (3 blocks, C): OK (1.73s) + 201 ns ± 2.5 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.07x + Packed bitwise AND, length 511 + packZipWith: OK (23.36s) + 5.60 μs ± 89 ns, 45 KB allocated, 11 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (1.92s) + 221 ns ± 4.2 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.04x + chunkedZipWith (2 blocks, C): OK (14.52s) + 215 ns ± 3.8 ns, 431 B allocated, 95 B copied, 52 MB peak memory, 0.04x + chunkedZipWith (3 blocks): OK (2.05s) + 233 ns ± 4.3 ns, 353 B allocated, 94 B copied, 52 MB peak memory, 0.04x + chunkedZipWith (3 blocks, C): OK (7.27s) + 214 ns ± 1.5 ns, 431 B allocated, 95 B copied, 52 MB peak memory, 0.04x + Packed bitwise AND, length 1023 + packZipWith: OK (0.74s) + 11.2 μs ± 170 ns, 90 KB allocated, 30 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (4.85s) + 282 ns ± 1.8 ns, 358 B allocated, 95 B copied, 52 MB peak memory, 0.03x + chunkedZipWith (2 blocks, C): OK (4.84s) + 280 ns ± 752 ps, 430 B allocated, 95 B copied, 52 MB peak memory, 0.02x + chunkedZipWith (3 blocks): OK (1.36s) + 305 ns ± 3.0 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.03x + chunkedZipWith (3 blocks, C): OK (2.49s) + 283 ns ± 3.3 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.03x + Packed bitwise AND, length 2047 + packZipWith: OK (2.95s) + 22.3 μs ± 350 ns, 181 KB allocated, 114 B copied, 52 MB peak memory + chunkedZipWith (2 blocks): OK (1.79s) + 394 ns ± 2.7 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.02x + chunkedZipWith (2 blocks, C): OK (3.43s) + 390 ns ± 6.4 ns, 427 B allocated, 95 B copied, 52 MB peak memory, 0.02x + chunkedZipWith (3 blocks): OK (1.87s) + 410 ns ± 5.8 ns, 347 B allocated, 92 B copied, 52 MB peak memory, 0.02x + chunkedZipWith (3 blocks, C): OK (0.94s) + 392 ns ± 5.5 ns, 405 B allocated, 90 B copied, 52 MB peak memory, 0.02x +``` + +We observe that the third and fourth approaches beat out the second by a factor +of at least 2, with said factor increasing to ~50 towards the largest inputs. +However, there doesn't appear to be much difference between the third and fourth +approaches. Additionally, the C-implemented versions do not out-perform their +Haskell equivalents by a worthwhile margin, while marginally increasing +allocations. + +### Bitwise complement + +The results of our benchmarks given the methodology we described are below. +Throughout, `map` refers to the first approach, `chunkedMap (2 blocks)` refers +to the second approach, and `chunkMap (3 blocks)` refers to the third approach. +We also mark the C implementation of the second approach. All multipliers are +shown as multiples of the running time of the first approach on the same length +of data. + +``` +All + Bitwise complement + Bitwise complement, length 1 + map: OK (26.39s) + 24.0 ns ± 464 ps, 111 B allocated, 0 B copied, 45 MB peak memory + chunkedMap (2 blocks): OK (26.34s) + 199 ns ± 3.7 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 8.31x + chunkedMap (2 blocks, C): OK (3.47s) + 205 ns ± 1.4 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 8.56x + chunkMap (3 blocks): OK (1.77s) + 206 ns ± 4.1 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 8.60x + Bitwise complement, length 3 + map: OK (0.97s) + 28.0 ns ± 334 ps, 119 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (3.53s) + 209 ns ± 1.4 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 7.46x + chunkedMap (2 blocks, C): OK (3.54s) + 210 ns ± 2.9 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 7.47x + chunkMap (3 blocks): OK (3.62s) + 214 ns ± 2.5 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 7.62x + Bitwise complement, length 7 + map: OK (1.03s) + 29.8 ns ± 410 ps, 119 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (1.76s) + 207 ns ± 1.5 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 6.93x + chunkedMap (2 blocks, C): OK (7.16s) + 213 ns ± 1.2 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 7.14x + chunkMap (3 blocks): OK (3.61s) + 214 ns ± 4.0 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 7.18x + Bitwise complement, length 15 + map: OK (2.26s) + 33.4 ns ± 222 ps, 127 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (3.61s) + 214 ns ± 1.6 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 6.40x + chunkedMap (2 blocks, C): OK (7.34s) + 219 ns ± 2.7 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 6.54x + chunkMap (3 blocks): OK (123.63s) + 234 ns ± 12 ns, 343 B allocated, 96 B copied, 53 MB peak memory, 7.01x + Bitwise complement, length 31 + map: OK (1.36s) + 40.3 ns ± 548 ps, 143 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (122.95s) + 232 ns ± 806 ps, 343 B allocated, 96 B copied, 53 MB peak memory, 5.75x + chunkedMap (2 blocks, C): OK (8.32s) + 247 ns ± 3.4 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 6.13x + chunkMap (3 blocks): OK (2.08s) + 245 ns ± 4.3 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 6.09x + Bitwise complement, length 63 + map: OK (0.94s) + 54.2 ns ± 748 ps, 173 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (144.17s) + 276 ns ± 6.8 ns, 343 B allocated, 96 B copied, 53 MB peak memory, 5.10x + chunkedMap (2 blocks, C): OK (1.17s) + 270 ns ± 4.2 ns, 330 B allocated, 92 B copied, 53 MB peak memory, 4.99x + chunkMap (3 blocks): OK (2.40s) + 281 ns ± 2.2 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 5.19x + Bitwise complement, length 127 + map: OK (0.69s) + 78.7 ns ± 1.4 ns, 235 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (5.88s) + 174 ns ± 1.4 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 2.21x + chunkedMap (2 blocks, C): OK (5.73s) + 169 ns ± 2.9 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 2.15x + chunkMap (3 blocks): OK (2.89s) + 169 ns ± 2.0 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 2.15x + Bitwise complement, length 255 + map: OK (0.96s) + 111 ns ± 1.4 ns, 362 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (1.55s) + 176 ns ± 2.1 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 1.58x + chunkedMap (2 blocks, C): OK (2.90s) + 166 ns ± 2.5 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 1.49x + chunkMap (3 blocks): OK (2.98s) + 173 ns ± 1.5 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 1.56x + Bitwise complement, length 511 + map: OK (0.77s) + 175 ns ± 3.4 ns, 611 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (0.89s) + 195 ns ± 2.7 ns, 330 B allocated, 92 B copied, 53 MB peak memory, 1.11x + chunkedMap (2 blocks, C): OK (12.96s) + 191 ns ± 186 ps, 343 B allocated, 95 B copied, 53 MB peak memory, 1.09x + chunkMap (3 blocks): OK (1.70s) + 192 ns ± 3.7 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 1.10x + Bitwise complement, length 1023 + map: OK (1.34s) + 311 ns ± 2.8 ns, 1.1 KB allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (2.19s) + 246 ns ± 2.0 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 0.79x + chunkedMap (2 blocks, C): OK (8.14s) + 236 ns ± 2.8 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 0.76x + chunkMap (3 blocks): OK (2.23s) + 248 ns ± 4.5 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 0.80x + Bitwise complement, length 2047 + map: OK (0.66s) + 592 ns ± 11 ns, 2.1 KB allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (92.07s) + 342 ns ± 568 ps, 343 B allocated, 96 B copied, 53 MB peak memory, 0.58x + chunkedMap (2 blocks, C): OK (21.84s) + 318 ns ± 780 ps, 343 B allocated, 95 B copied, 53 MB peak memory, 0.54x + chunkMap (3 blocks): OK (6.18s) + 354 ns ± 6.3 ns, 341 B allocated, 95 B copied, 53 MB peak memory, 0.60x +``` + +These results show that until the input length becomes significantly long +(around 1KiB), the first approach is much better (as much as a factor of 8). We +do see some improvement at the upper end of our sizes, but this amounts to about +a factor of 2 at most. The C implementation of the second approach does not seem +to give significant speedups; the third approach appears slower than the second +for all tested sizes. + +To establish where the 'phase transition' between the first and second +approaches happens, we ran further benchmarks, limiting our sizes to the space +between 511 and 1023 bytes: + +``` +All + Bitwise complement probe + Bitwise complement probe, length 511 + map: OK (4.99s) + 147 ns ± 2.2 ns, 621 B allocated, 0 B copied, 45 MB peak memory + chunkedMap (2 blocks): OK (1.64s) + 182 ns ± 2.5 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 1.24x + chunkedMap (2 blocks, C): OK (1.61s) + 181 ns ± 1.3 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 1.23x + chunkMap (3 blocks): OK (0.84s) + 183 ns ± 3.1 ns, 330 B allocated, 92 B copied, 53 MB peak memory, 1.25x + Bitwise complement probe, length 767 + map: OK (0.88s) + 205 ns ± 2.7 ns, 875 B allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (0.95s) + 206 ns ± 2.6 ns, 330 B allocated, 92 B copied, 53 MB peak memory, 1.00x + chunkedMap (2 blocks, C): OK (6.91s) + 202 ns ± 1.1 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 0.99x + chunkMap (3 blocks): OK (1.85s) + 208 ns ± 3.1 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 1.02x + Bitwise complement probe, length 1023 + map: OK (1.26s) + 295 ns ± 4.5 ns, 1.1 KB allocated, 0 B copied, 53 MB peak memory + chunkedMap (2 blocks): OK (63.98s) + 238 ns ± 292 ps, 343 B allocated, 95 B copied, 53 MB peak memory, 0.81x + chunkedMap (2 blocks, C): OK (7.89s) + 228 ns ± 3.2 ns, 343 B allocated, 95 B copied, 53 MB peak memory, 0.77x + chunkMap (3 blocks): OK (2.15s) + 242 ns ± 1.5 ns, 339 B allocated, 94 B copied, 53 MB peak memory, 0.82x +``` + +We note that at 767 bytes (exactly mid-way), the 'phase transition' has already +occurred. + +### Population count + +The results of our benchmark given the methodology we described are below. +Throughout, `foldl'` refers to the first approach, `chunkPopCount2 to the second +approach, and `chunkPopCount3` to the third approach. All multipliers are shown +as multiples of the running time of the first approach on the same length of +data. + +``` +All + Popcount + Popcount, length 1 + foldl': OK (49.30s) + 22.9 ns ± 348 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (3.17s) + 23.3 ns ± 456 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.02x + chunkPopCount3: OK (0.81s) + 24.1 ns ± 330 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.05x + Popcount, length 3 + foldl': OK (0.92s) + 27.1 ns ± 372 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (113.34s) + 26.7 ns ± 64 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.98x + chunkPopCount3: OK (0.91s) + 26.8 ns ± 372 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.99x + Popcount, length 7 + foldl': OK (4.05s) + 30.3 ns ± 226 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (8.52s) + 31.9 ns ± 40 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.05x + chunkPopCount3: OK (1.05s) + 31.2 ns ± 322 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.03x + Popcount, length 15 + foldl': OK (0.69s) + 40.4 ns ± 682 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (4.39s) + 32.8 ns ± 100 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.81x + chunkPopCount3: OK (0.57s) + 33.6 ns ± 638 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.83x + Popcount, length 31 + foldl': OK (2.08s) + 61.9 ns ± 420 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (81.00s) + 37.8 ns ± 222 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.61x + chunkPopCount3: OK (0.64s) + 37.6 ns ± 688 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.61x + Popcount, length 63 + foldl': OK (0.83s) + 99.5 ns ± 1.8 ns, 38 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (0.77s) + 45.2 ns ± 778 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.45x + chunkPopCount3: OK (0.75s) + 44.7 ns ± 666 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.45x + Popcount, length 127 + foldl': OK (0.68s) + 161 ns ± 2.7 ns, 31 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (1.09s) + 64.0 ns ± 650 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.40x + chunkPopCount3: OK (1.12s) + 66.7 ns ± 696 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.41x + Popcount, length 255 + foldl': OK (0.60s) + 287 ns ± 5.7 ns, 25 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (0.89s) + 104 ns ± 1.7 ns, 38 B allocated, 0 B copied, 47 MB peak memory, 0.36x + chunkPopCount3: OK (0.86s) + 101 ns ± 1.6 ns, 38 B allocated, 0 B copied, 47 MB peak memory, 0.35x + Popcount, length 511 + foldl': OK (0.58s) + 538 ns ± 11 ns, 0 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (0.77s) + 181 ns ± 2.9 ns, 31 B allocated, 0 B copied, 47 MB peak memory, 0.34x + chunkPopCount3: OK (0.72s) + 167 ns ± 2.8 ns, 31 B allocated, 0 B copied, 47 MB peak memory, 0.31x + Popcount, length 1023 + foldl': OK (1.11s) + 1.04 μs ± 11 ns, 0 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (0.73s) + 341 ns ± 5.3 ns, 25 B allocated, 0 B copied, 47 MB peak memory, 0.33x + chunkPopCount3: OK (1.28s) + 301 ns ± 2.7 ns, 31 B allocated, 0 B copied, 47 MB peak memory, 0.29x + Popcount, length 2047 + foldl': OK (1.09s) + 2.05 μs ± 28 ns, 0 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount2: OK (0.69s) + 649 ns ± 12 ns, 0 B allocated, 0 B copied, 47 MB peak memory, 0.32x + chunkPopCount3: OK (1.20s) + 568 ns ± 6.7 ns, 25 B allocated, 0 B copied, 47 MB peak memory, 0.28x +``` + +We observe that, even for short inputs, the time required by the second and +third approach is not significantly worse than the first: the difference is at +most 5%, which at the scale being measured is barely distinct from noise. Once +the length reaches 15, there is about a 20% improvement in running time when +using the second and third approaches relative the first, and for lengths larger +than this, the increase only continues. Overall, the third approach does not +appear significantly better than the second until the input size reaches 511, +but isn't significantly worse at lengths above 15. To more clearly see the +difference, we also ran the same inputs, but compared the second approach to the +third: + +``` +All + Block popcount + Block popcount, length 1 + chunkPopCount2: OK (0.74s) + 20.5 ns ± 404 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (3.12s) + 23.3 ns ± 312 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.14x + Block popcount, length 3 + chunkPopCount2: OK (3.31s) + 25.1 ns ± 134 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (1.70s) + 25.4 ns ± 352 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.01x + Block popcount, length 7 + chunkPopCount2: OK (1.02s) + 30.2 ns ± 456 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (1.08s) + 32.2 ns ± 432 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.07x + Block popcount, length 15 + chunkPopCount2: OK (1.09s) + 32.2 ns ± 366 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (1.06s) + 31.1 ns ± 400 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.97x + Block popcount, length 31 + chunkPopCount2: OK (0.60s) + 35.2 ns ± 684 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (0.61s) + 36.3 ns ± 642 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 1.03x + Block popcount, length 63 + chunkPopCount2: OK (0.76s) + 45.0 ns ± 772 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (2.96s) + 44.1 ns ± 174 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.98x + Block popcount, length 127 + chunkPopCount2: OK (1.05s) + 62.0 ns ± 740 ps, 39 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (1.00s) + 59.3 ns ± 986 ps, 39 B allocated, 0 B copied, 47 MB peak memory, 0.96x + Block popcount, length 255 + chunkPopCount2: OK (0.85s) + 100 ns ± 1.3 ns, 38 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (0.80s) + 93.4 ns ± 1.3 ns, 38 B allocated, 0 B copied, 47 MB peak memory, 0.93x + Block popcount, length 511 + chunkPopCount2: OK (0.75s) + 175 ns ± 2.7 ns, 31 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (0.68s) + 160 ns ± 2.7 ns, 31 B allocated, 0 B copied, 47 MB peak memory, 0.91x + Block popcount, length 1023 + chunkPopCount2: OK (0.70s) + 330 ns ± 5.2 ns, 25 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (0.63s) + 294 ns ± 5.5 ns, 25 B allocated, 0 B copied, 47 MB peak memory, 0.89x + Block popcount, length 2047 + chunkPopCount2: OK (1.32s) + 624 ns ± 5.6 ns, 25 B allocated, 0 B copied, 47 MB peak memory + chunkPopCount3: OK (0.60s) + 562 ns ± 10 ns, 0 B allocated, 0 B copied, 47 MB peak memory, 0.90x +``` + +We can see that the benefits of the third approach versus the second amount to +10% better performance at most, and even that only occurs at length 511 and +higher. At the same time, for lengths below 15, there can be up to a 15% penalty +for using the third approach over the second. + +## Conclusion and recommendations + +We do not believe the use of the FFI and implementing any operations in +(portable C) to be worthwhile; there appear to be no significant gains in speed, +and GHC appears able to generate code competitive with the C compiler. + +For bitwise binary operations, we recommend a 'hybrid' approach: for smaller +input lengths (less than 30 items), we use the first (naive) approach, while for +anything larger, we use the third approach (with `Word64`-width +bit-parallelism). This would give us good performance on both small and large +inputs, and would not require a significant overhead, as we have to verify that +the lengths of our inputs match anyway. + +For bitwise complement, we also recommend a 'hybrid' approach: for input lengths +less than 760 bytes, we use the first (naive) approach, while for anything +larger, we use the second approach (with `Word64`-width bit-parallelism). While +this would require some overhead for a length check, we believe that it's +worthwhile, as the length of an input `ByteString` is statically known. However, +if we consider inputs of this length unlikely relative the extra code path, +using the first approach in all cases is acceptable; however, we don't believe +that the extra code represents significant maintenance or runtime overheads, and +while inputs of this size would be unlikely, they're not impossible. + +Popcount should be implemented using the second (`Word64`-width bit-parallel) +approach only. The first (naive) approach is not significantly better at any +length, and the third (`Word256`-width bit parallel) approach only out-performs +the second by a small margin for large inputs. While a 'hybrid' approach for +this operation may be possible in theory, the benefits relative the extra code +and its maintenance don't appear worthwhile. + +## Future work + +Many, if not most, of the operations here can be significantly accelerated using +[SIMD +instructions](https://en.wikipedia.org/wiki/Single_instruction,_multiple_data). +This is because many of the operations are bit-parallel or monoidal (bitwise +binary operations and population counting) while others can benefit from +instruction-level parallelism and wider words, as well as specialized +instructions. Given that this code will be run on Cardano nodes, which are +likely to be x86-64 machines with recent instruction sets, the gains are +potentially significant; at the same time, this would require significant extra +build-time checks, as well as fallbacks when said instructions are not +available. diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Binary.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Binary.hs new file mode 100644 index 00000000000..9150ee42436 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Binary.hs @@ -0,0 +1,88 @@ +module Benches.Binary ( + benches + ) where + +import Control.Monad (guard) +import Data.Bits ((.&.)) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (fromForeignPtr, mallocByteString) +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Foldable (for_) +import Data.Word (Word8) +import DataGen (mkBinaryArgs, noCleanup, sizes) +import Foreign.C.Types (CSize (CSize), CUChar) +import Foreign.ForeignPtr (castForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, castPtr) +import Foreign.Storable (peekByteOff, pokeByteOff) +import GHC.Exts (fromList) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic AND" $ benchBasic "Basic AND" <$> sizes + +-- Helpers + +-- Benchmark a naive Haskell implementation against a clone of packZipWith and a +-- naive C one. +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkBinaryArgs len) noCleanup $ \xs -> + let naiveLabel = "zipWith" + packedLabel = "packedZipWith" + cnaiveLabel = "naive C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ wrap usingZW <$> xs, + bcompare matchLabel . bench packedLabel . nfIO $ wrap usingPZW <$> xs, + bcompare matchLabel . bench cnaiveLabel . nfIO $ wrap usingCNaive <$> xs + ] + +-- Saves repeatedly doing the same thing +wrap :: + (ByteString -> ByteString -> ByteString) -> + (ByteString, ByteString) -> + Maybe ByteString +wrap f (bs1, bs2) = do + guard (BS.length bs2 == len) + pure . f bs1 $ bs2 + where + len :: Int + len = BS.length bs1 + +usingZW :: ByteString -> ByteString -> ByteString +usingZW bs = fromList . BS.zipWith (.&.) bs + +usingPZW :: ByteString -> ByteString -> ByteString +usingPZW bs1 bs2 = unsafeDupablePerformIO . + unsafeUseAsCStringLen bs1 $ \(ptr1, len) -> + unsafeUseAsCStringLen bs2 $ \(ptr2, _) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> for_ [0 .. len - 1] $ \i -> do + b1 :: Word8 <- peekByteOff ptr1 i + b2 <- peekByteOff ptr2 i + pokeByteOff dst i $ b1 .&. b2 + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + +usingCNaive :: ByteString -> ByteString -> ByteString +usingCNaive bs1 bs2 = unsafeDupablePerformIO . + unsafeUseAsCStringLen bs1 $ \(ptr1, len) -> + unsafeUseAsCStringLen bs2 $ \(ptr2, _) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> + cAndNaive dst (castPtr ptr1) (castPtr ptr2) (fromIntegral len) + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + +foreign import ccall unsafe "cbits.h c_and_naive" + cAndNaive :: + Ptr CUChar -> + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/BitRead.hs b/plutus-core/plutus-core/bench/bitwise/Benches/BitRead.hs new file mode 100644 index 00000000000..19a576d83f3 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/BitRead.hs @@ -0,0 +1,63 @@ +module Benches.BitRead ( + benches + ) where + +import Control.Monad (guard) +import Data.Bits (shiftR, testBit) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CBool (CBool), CSize (CSize), CUChar) +import Foreign.Ptr (Ptr, castPtr) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic bit read" $ benchBasic "Basic bit read" <$> sizes + +-- Helpers + +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "ByteString ops" + cLabel = "C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ bitAt (len - 1) <$> xs, + bcompare matchLabel . bench cLabel . nfIO $ wrapper (len - 1) <$> xs + ] + +bitAt :: Int -> ByteString -> Maybe Bool +bitAt ix bs = do + guard (ix >= 0) + guard (ix < bitLength) + let (bigIx, smallIx) = ix `quotRem` 8 + let byte = BS.index bs bigIx + pure . testBit byte $ shiftR 0x80 smallIx + where + bitLength :: Int + bitLength = BS.length bs * 8 + +wrapper :: Int -> ByteString -> Maybe Bool +wrapper ix bs = do + guard (ix >= 0) + guard (ix <= bitLength) + let CBool res = unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(ptr, _) -> + pure . cBitAt (fromIntegral ix) . castPtr $ ptr + pure $ res /= 0 + where + bitLength :: Int + bitLength = BS.length bs * 8 + +foreign import ccall unsafe "cbits.h c_bit_at" + cBitAt :: + CSize -> + Ptr CUChar -> + CBool diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/BitWrite.hs b/plutus-core/plutus-core/bench/bitwise/Benches/BitWrite.hs new file mode 100644 index 00000000000..5d2180ac5d5 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/BitWrite.hs @@ -0,0 +1,99 @@ +module Benches.BitWrite ( + benches + ) where + +import Control.Monad (guard) +import Data.Bits (clearBit, setBit, shiftR) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (fromForeignPtr, mallocByteString) +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Word (Word8) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CBool (CBool), CSize (CSize), CUChar) +import Foreign.ForeignPtr (castForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, castPtr) +import GHC.Exts (fromList, toList) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Worst-case bit write" $ benchBasic "Worst-case bit write" <$> sizes + +-- Helpers + +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "ByteString ops" + cnaiveLabel = "Naive C" + cmemcpyLabel = "Memcpy C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ bitSet False (len - 1) <$> xs, + bcompare matchLabel . bench cnaiveLabel . nfIO + $ wrapper cBitSetNaive False (len - 1) <$> xs, + bcompare matchLabel . bench cmemcpyLabel . nfIO + $ wrapper cBitSetMemcpy False (len - 1) <$> xs + ] + +bitSet :: Bool -> Int -> ByteString -> Maybe ByteString +bitSet b ix bs = do + guard (ix >= 0) + guard (ix < bitLength) + pure . fromList . fmap (uncurry go) . zip [0 ..] . toList $ bs + where + go :: Int -> Word8 -> Word8 + go candidateIx w8 + | candidateIx /= bigIx = w8 + | b = setBit w8 $ shiftR 0x80 smallIx + | otherwise = clearBit w8 $ shiftR 0x80 smallIx + bitLength :: Int + bitLength = BS.length bs * 8 + bigIx :: Int + bigIx = ix `quot` 8 + smallIx :: Int + smallIx = ix `rem` 8 + +wrapper :: + (CBool -> CSize -> Ptr CUChar -> Ptr CUChar -> CSize -> IO ()) -> + Bool -> + Int -> + ByteString -> + Maybe ByteString +wrapper f b ix bs = do + guard (ix >= 0) + guard (ix < bitLength) + pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(src, len) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> + if b + then f (CBool 1) (fromIntegral ix) dst (castPtr src) . fromIntegral $ len + else f (CBool 0) (fromIntegral ix) dst (castPtr src) . fromIntegral $ len + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + where + bitLength :: Int + bitLength = BS.length bs * 8 + +foreign import ccall unsafe "cbits.h c_bit_set_naive" + cBitSetNaive :: + CBool -> + CSize -> + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () + +foreign import ccall unsafe "cbits.h c_bit_set_memcpy" + cBitSetMemcpy :: + CBool -> + CSize -> + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Complement.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Complement.hs new file mode 100644 index 00000000000..edc26420ef4 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Complement.hs @@ -0,0 +1,51 @@ +-- editorconfig-checker-disable-file +module Benches.Complement ( + benches + ) where + +import Data.Bits (complement) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (fromForeignPtr, mallocByteString) +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CSize (CSize), CUChar) +import Foreign.ForeignPtr (castForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, castPtr) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic complement" $ benchBasic "Basic complement" <$> sizes + +-- Helpers + +-- Benchmark a naive Haskell implementation against the C one +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "map" + cnaiveLabel = "naive C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ BS.map complement <$> xs, + bcompare matchLabel . bench cnaiveLabel . nfIO $ xs >>= wrapping cComplementNaive + ] + +-- Avoids having to rewrap C complement ops tediously each time +wrapping :: (Ptr CUChar -> Ptr CUChar -> CSize -> IO ()) -> ByteString -> IO ByteString +wrapping f bs = unsafeUseAsCStringLen bs $ \(ptr, len) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> f dst (castPtr ptr) (fromIntegral len) + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + +foreign import ccall unsafe "cbits.h c_complement_naive" + cComplementNaive :: + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Convert.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Convert.hs new file mode 100644 index 00000000000..711fa92e18e --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Convert.hs @@ -0,0 +1,164 @@ +{-# LANGUAGE BangPatterns #-} + +module Benches.Convert ( + benchesBSToI, + benchesIToBS, + ) where + +import Control.Monad (guard) +import Data.Bits (unsafeShiftL, zeroBits) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Word (Word16, Word32, Word64, Word8) +import DataGen (mkInteger, mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CChar) +import Foreign.Ptr (Ptr) +import Foreign.Storable (peekByteOff) +import GHC.Exts (fromList) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benchesBSToI :: Benchmark +benchesBSToI = bgroup "Basic ByteString to Integer conversion" $ + benchBSToI "Basic ByteString to Integer conversion" <$> sizes + +benchesIToBS :: Benchmark +benchesIToBS = bgroup "Basic Integer to ByteString conversion" $ + benchIToBS "Basic Integer to ByteString conversion" <$> sizes + +-- Helpers + +benchBSToI :: + String -> + Int -> + Benchmark +benchBSToI mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "scan backwards" + forwardsLabel = "scan forwards" + shiftLabel = "scan backwards with shifts" + blockLabel = "scan backwards in blocks with shifts" + forwardsShiftLabel = "scan forwards with shifts" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ bsToI <$> xs, + bcompare matchLabel . bench forwardsLabel . nfIO $ bsToIForward <$> xs, + bcompare matchLabel . bench shiftLabel . nfIO $ bsToIShift <$> xs, + bcompare matchLabel . bench forwardsShiftLabel . nfIO $ bsToIShiftForward <$> xs, + bcompare matchLabel . bench blockLabel . nfIO $ bsToIShiftBlock <$> xs + ] + +benchIToBS :: + String -> + Int -> + Benchmark +benchIToBS mainLabel len = + withResource (mkInteger len) noCleanup $ \i -> + let naiveLabel = "naive" + testLabel = mainLabel <> ", length " <> show len in + bgroup testLabel [ + bench naiveLabel . nfIO $ iToBS <$> i + ] + +-- Implementations + +iToBS :: Integer -> Maybe ByteString +iToBS i = case signum i of + (-1) -> Nothing + 0 -> pure . BS.singleton $ zeroBits + _ -> pure $ if i < 256 + then BS.singleton . fromIntegral $ i + else fromList . go [] $ i + where + go :: [Word8] -> Integer -> [Word8] + go acc !j = case j `quotRem` 256 of + (0, r) -> fromIntegral r : acc -- we're done + (d, r) -> go (fromIntegral r : acc) d + +bsToI :: ByteString -> Maybe Integer +bsToI bs = do + guard (len > 0) + pure . go 0 1 $ len - 1 + where + len :: Int + len = BS.length bs + go :: Integer -> Integer -> Int -> Integer + go !acc !mult !ix = let limb :: Integer = fromIntegral . BS.index bs $ ix + limbValue = limb * mult + acc' = acc + limbValue in + if ix == 0 + then acc' + else go acc' (mult * 256) $ ix - 1 + +bsToIForward :: ByteString -> Maybe Integer +bsToIForward bs = do + guard (len > 0) + pure . snd . BS.foldl' go (256 ^ (len - 1), 0) $ bs + where + len :: Int + len = BS.length bs + go :: (Integer, Integer) -> Word8 -> (Integer, Integer) + go (mult, acc) w8 = let limbValue = fromIntegral w8 * mult in + (mult `quot` 256, acc + limbValue) + +bsToIShift :: ByteString -> Maybe Integer +bsToIShift bs = do + guard (len > 0) + pure . go 0 0 $ len - 1 + where + len :: Int + len = BS.length bs + go :: Integer -> Int -> Int -> Integer + go !acc !shift !ix = let limb :: Integer = fromIntegral . BS.index bs $ ix + limbValue = limb `unsafeShiftL` shift + acc' = acc + limbValue in + if ix == 0 + then acc' + else go acc' (shift + 8) $ ix - 1 + +bsToIShiftBlock :: ByteString -> Maybe Integer +bsToIShiftBlock bs = do + guard (len > 0) + pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(p, _) -> do + go p 0 0 $ len - 1 + where + len :: Int + len = BS.length bs + go :: Ptr CChar -> Integer -> Int -> Int -> IO Integer + go p !acc !shift !ix + | ix >= 7 = do + w64 :: Word64 <- peekByteOff p (ix - 7) + let limb :: Integer = fromIntegral w64 + let limbValue = limb `unsafeShiftL` shift + go p (acc + limbValue) (shift + 64) $ ix - 8 + | ix >= 3 = do + w32 :: Word32 <- peekByteOff p (ix - 3) + let limb :: Integer = fromIntegral w32 + let limbValue = limb `unsafeShiftL` shift + go p (acc + limbValue) (shift + 32) $ ix - 4 + | ix >= 1 = do + w16 :: Word16 <- peekByteOff p (ix - 1) + let limb :: Integer = fromIntegral w16 + let limbValue = limb `unsafeShiftL` shift + go p (acc + limbValue) (shift + 16) $ ix - 2 + | ix == 0 = do + w8 :: Word8 <- peekByteOff p ix + let limb :: Integer = fromIntegral w8 + let limbValue = limb `unsafeShiftL` shift + pure $ acc + limbValue + | otherwise = pure acc + +bsToIShiftForward :: ByteString -> Maybe Integer +bsToIShiftForward bs = do + guard (len > 0) + pure . snd . BS.foldl' go ((len - 1) * 8, 0) $ bs + where + len :: Int + len = BS.length bs + go :: (Int, Integer) -> Word8 -> (Int, Integer) + go (shift, acc) w8 = let limbValue = fromIntegral w8 `unsafeShiftL` shift in + (shift - 8, acc + limbValue) + diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/CountLeadingZeroes.hs b/plutus-core/plutus-core/bench/bitwise/Benches/CountLeadingZeroes.hs new file mode 100644 index 00000000000..bbbaec25a13 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/CountLeadingZeroes.hs @@ -0,0 +1,87 @@ +module Benches.CountLeadingZeroes ( + benches, + cBenches, + ) where + +import Data.Bits (countLeadingZeros, zeroBits) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Maybe (fromMaybe) +import DataGen (mkZeroesOne, noCleanup, sizes) +import Foreign.C.Types (CSize (CSize), CUChar) +import Foreign.Ptr (Ptr, castPtr) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic CLZ" $ benchBasic "CLZ" <$> sizes + +cBenches :: Benchmark +cBenches = bgroup "C CLZ" $ benchC "C CLZ" <$> sizes + +-- Helpers + +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkZeroesOne len) noCleanup $ \xs -> + let naiveLabel = "ByteString ops" + cnaiveLabel = "naive C" + cblockLabel = "block C" + cunrolledLabel = "unrolled C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ naiveClz <$> xs, + bcompare matchLabel . bench cnaiveLabel . nfIO $ xs >>= wrapping cClzNaive, + bcompare matchLabel . bench cblockLabel . nfIO $ xs >>= wrapping cClzBlock, + bcompare matchLabel . bench cunrolledLabel . nfIO $ xs >>= wrapping cClzBlockUnrolled + ] + +benchC :: + String -> + Int -> + Benchmark +benchC mainLabel len = + withResource (mkZeroesOne len) noCleanup $ \xs -> + let cnaiveLabel = "naive C" + cblockLabel = "block C" + cunrolledLabel = "unrolled C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> cnaiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench cnaiveLabel . nfIO $ xs >>= wrapping cClzNaive, + bcompare matchLabel . bench cblockLabel . nfIO $ xs >>= wrapping cClzBlock, + bcompare matchLabel . bench cunrolledLabel . nfIO $ xs >>= wrapping cClzBlockUnrolled + ] + +naiveClz :: ByteString -> Int +naiveClz bs = fromMaybe (BS.length bs * 8) $ do + ix <- BS.findIndex (/= zeroBits) bs + pure $ ix * 8 + countLeadingZeros (BS.index bs ix) + +-- Avoids having to rewrap C ops tediously each time +wrapping :: (Ptr CUChar -> CSize -> CSize) -> ByteString -> IO Int +wrapping f bs = unsafeUseAsCStringLen bs $ \(ptr, len) -> + pure . fromIntegral . f (castPtr ptr) . fromIntegral $ len + +foreign import ccall unsafe "cbits.h c_clz_naive" + cClzNaive :: + Ptr CUChar -> + CSize -> + CSize + +foreign import ccall unsafe "cbits.h c_clz_block" + cClzBlock :: + Ptr CUChar -> + CSize -> + CSize + +foreign import ccall unsafe "cbits.h c_clz_block_unrolled" + cClzBlockUnrolled :: + Ptr CUChar -> + CSize -> + CSize diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Popcount.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Popcount.hs new file mode 100644 index 00000000000..71572160ac5 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Popcount.hs @@ -0,0 +1,85 @@ +-- editorconfig-checker-disable-file +module Benches.Popcount ( + benches, + cBenches + ) where + +import Data.Bits (popCount) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CSize (CSize), CUChar) +import Foreign.Ptr (Ptr, castPtr) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic popcount" $ benchBasic "Basic popcount" <$> sizes + +cBenches :: Benchmark +cBenches = bgroup "C popcount" $ benchC "C popcount" <$> sizes + +-- Helpers + +-- Benchmark a naive Haskell implementation against all the C ones +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "foldl'" + cnaiveLabel = "naive C" + cblockLabel = "block C" + cblockUnrollLabel = "block unrolled C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ BS.foldl' (\acc w8 -> acc + popCount w8) 0 <$> xs, + bcompare matchLabel . bench cnaiveLabel . nfIO $ xs >>= wrapping cPopcountNaive, + bcompare matchLabel . bench cblockLabel . nfIO $ xs >>= wrapping cPopcountBlock, + bcompare matchLabel . bench cblockUnrollLabel . nfIO $ xs >>= wrapping cPopcountBlockUnroll + ] + +-- Benchmark naive C against the other C ones +benchC :: + String -> + Int -> + Benchmark +benchC mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let cnaiveLabel = "naive C" + cblockLabel = "block C" + cblockUnrollLabel = "block unrolled C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> cnaiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench cnaiveLabel . nfIO $ xs >>= wrapping cPopcountNaive, + bcompare matchLabel . bench cblockLabel . nfIO $ xs >>= wrapping cPopcountBlock, + bcompare matchLabel . bench cblockUnrollLabel . nfIO $ xs >>= wrapping cPopcountBlockUnroll + ] + + +-- Avoids having to rewrap C popcount ops tediously each time +wrapping :: (Ptr CUChar -> CSize -> CSize) -> ByteString -> IO CSize +wrapping f bs = unsafeUseAsCStringLen bs $ \(ptr, len) -> + pure $ f (castPtr ptr) (fromIntegral len) + +foreign import ccall unsafe "cbits.h c_popcount_naive" + cPopcountNaive :: + Ptr CUChar -> + CSize -> + CSize + +foreign import ccall unsafe "cbits.h c_popcount_block" + cPopcountBlock :: + Ptr CUChar -> + CSize -> + CSize + +foreign import ccall unsafe "cbits.h c_popcount_block_unroll" + cPopcountBlockUnroll :: + Ptr CUChar -> + CSize -> + CSize diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Rotate.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Rotate.hs new file mode 100644 index 00000000000..5123fb1a01d --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Rotate.hs @@ -0,0 +1,87 @@ +module Benches.Rotate ( + benches, + ) where + +import Data.Bits (bit, shiftR, testBit, zeroBits, (.|.)) +import Data.Bool (bool) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (fromForeignPtr, mallocByteString) +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Foldable (foldl') +import Data.Word (Word8) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CInt (CInt), CSize (CSize), CUChar) +import Foreign.ForeignPtr (castForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, castPtr) +import GHC.Exts (fromList) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic bitwise rotate" $ benchBasic "Basic bitwise rotate" <$> sizes + +-- Helpers + +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "ByteString ops" + cLabel = "C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ bitRotate (len * 4) <$> xs, + bcompare matchLabel . bench cLabel . nfIO $ bitRotateC (len * 4) <$> xs + ] + +bitRotate :: Int -> ByteString -> ByteString +bitRotate i bs + | bitLen == 0 = bs + | otherwise = case i `rem` bitLen of + 0 -> bs -- nothing to do + j -> fromList $ go j <$> [0 .. BS.length bs - 1] + where + bitLen :: Int + bitLen = BS.length bs * 8 + go :: Int -> Int -> Word8 + go j byteIx = let bitIxes = (\ix -> 8 * byteIx - j + ix) <$> [0 .. 7] + bits = bitAtWraparound bs <$> bitIxes + zipped = zip [7, 6 .. 0] bits in + foldl' (\acc (pos, b) -> acc .|. bool zeroBits (bit pos) b) zeroBits zipped + +bitRotateC :: Int -> ByteString -> ByteString +bitRotateC i bs + | bitLen == 0 = bs + | otherwise = case i `rem` bitLen of + 0 -> bs -- nothing to do + j -> unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(src, len) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> + cRotateBits (fromIntegral j) dst (castPtr src) . fromIntegral $ len + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + where + bitLen :: Int + bitLen = BS.length bs * 8 + +bitAtWraparound :: ByteString -> Int -> Bool +bitAtWraparound bs i + | i < 0 = bitAtWraparound bs (i + bitLength) + | otherwise = let (bigIx, smallIx) = i `quotRem` 8 + byte = BS.index bs bigIx in + testBit byte $ shiftR 0x80 smallIx + where + bitLength :: Int + bitLength = BS.length bs * 8 + +foreign import ccall unsafe "cbits.h c_rotate_bits" + cRotateBits :: + CInt -> + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () diff --git a/plutus-core/plutus-core/bench/bitwise/Benches/Shift.hs b/plutus-core/plutus-core/bench/bitwise/Benches/Shift.hs new file mode 100644 index 00000000000..98b204fe697 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Benches/Shift.hs @@ -0,0 +1,79 @@ +module Benches.Shift ( + benches, + ) where + +import Data.Bits (bit, shiftR, testBit, zeroBits, (.|.)) +import Data.Bool (bool) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (fromForeignPtr, mallocByteString) +import Data.ByteString.Unsafe (unsafeUseAsCStringLen) +import Data.Foldable (foldl') +import Data.Word (Word8) +import DataGen (mkUnaryArg, noCleanup, sizes) +import Foreign.C.Types (CInt (CInt), CSize (CSize), CUChar) +import Foreign.ForeignPtr (castForeignPtr, withForeignPtr) +import Foreign.Ptr (Ptr, castPtr) +import GHC.Exts (fromList) +import System.IO.Unsafe (unsafeDupablePerformIO) +import Test.Tasty (withResource) +import Test.Tasty.Bench (Benchmark, bcompare, bench, bgroup, nfIO) + +benches :: Benchmark +benches = bgroup "Basic bitwise shift" $ benchBasic "Basic bitwise shift" <$> sizes + +-- Helpers + +benchBasic :: + String -> + Int -> + Benchmark +benchBasic mainLabel len = + withResource (mkUnaryArg len) noCleanup $ \xs -> + let naiveLabel = "ByteString ops" + cLabel = "C" + testLabel = mainLabel <> ", length " <> show len + matchLabel = "$NF == \"" <> naiveLabel <> "\" && $(NF - 1) == \"" <> testLabel <> "\"" in + bgroup testLabel [ + bench naiveLabel . nfIO $ bitShift (len * 4) <$> xs, + bcompare matchLabel . bench cLabel . nfIO $ bitShiftC (len * 4) <$> xs + ] + +bitShift :: Int -> ByteString -> ByteString +bitShift i bs = case signum i of + 0 -> bs + _ -> fromList $ go <$> [0 .. BS.length bs - 1] + where + go :: Int -> Word8 + go byteIx = let bitIxes = (\ix -> 8 * byteIx - i + ix) <$> [0 .. 7] + bits = bitAtClipping bs <$> bitIxes + zipped = zip [7, 6 .. 0] bits in + foldl' (\acc (pos, b) -> acc .|. bool zeroBits (bit pos) b) zeroBits zipped + +bitShiftC :: Int -> ByteString -> ByteString +bitShiftC i bs = case signum i of + 0 -> bs + _ -> unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(src, len) -> do + fp <- mallocByteString len + withForeignPtr fp $ \dst -> + cShiftBits (fromIntegral i) dst (castPtr src) . fromIntegral $ len + pure . fromForeignPtr (castForeignPtr fp) 0 $ len + +bitAtClipping :: ByteString -> Int -> Bool +bitAtClipping bs i + | i < 0 = False + | i >= bitLength = False + | otherwise = let (bigIx, smallIx) = i `quotRem` 8 + byte = BS.index bs bigIx in + testBit byte $ shiftR 0x80 smallIx + where + bitLength :: Int + bitLength = BS.length bs * 8 + +foreign import ccall unsafe "cbits.h c_shift_bits" + cShiftBits :: + CInt -> + Ptr CUChar -> + Ptr CUChar -> + CSize -> + IO () diff --git a/plutus-core/plutus-core/bench/bitwise/DataGen.hs b/plutus-core/plutus-core/bench/bitwise/DataGen.hs new file mode 100644 index 00000000000..21ce2cac7e9 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/DataGen.hs @@ -0,0 +1,65 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE ScopedTypeVariables #-} + +module DataGen ( + mkUnaryArg, + mkHomogenousArg, + mkBinaryArgs, + mkInteger, + mkZeroesOne, + sizes, + noCleanup, + ) where + +import Control.Monad (replicateM) +import Control.Monad.State.Strict (State) +import Data.Bits (unsafeShiftL) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.Kind (Type) +import Data.Word (Word8) +import GHC.Exts (fromListN) +import System.Random.Stateful (StateGenM, StdGen, mkStdGen, randomM, runStateGen_, uniformWord8) + +-- Generate an Integer that will require a representation of this many bytes +mkInteger :: Int -> IO Integer +mkInteger len = pure . runStateGen_ (mkStdGen 42) $ \gen -> + go gen 0 0 + where + go :: StateGenM StdGen -> Int -> Integer -> State StdGen Integer + go !gen !place !acc + | place == len = pure acc + -- we generate non-zero bytes to ensure we don't get truncations + | otherwise = do + block <- uniformWord8 gen + let result = fromIntegral $ if block == 0 then block + 1 else block + go gen (place + 1) $ acc + result `unsafeShiftL` (place * 8) + +-- Generate a ByteString of a given length +mkUnaryArg :: Int -> IO ByteString +mkUnaryArg len = pure . runStateGen_ (mkStdGen 42) $ \gen -> + fromListN len <$> replicateM len (randomM gen) + +-- Generate a ByteString of a given length full of the given byte +mkHomogenousArg :: Int -> Word8 -> IO ByteString +mkHomogenousArg len = pure . BS.replicate len + +-- Generates n - 1 zeroes, followed by a one byte +mkZeroesOne :: Int -> IO ByteString +mkZeroesOne len = pure $ BS.snoc (BS.replicate (len - 1) 0x00) 0x01 + +-- Generate two ByteStrings, both of a given length +mkBinaryArgs :: Int -> IO (ByteString, ByteString) +mkBinaryArgs len = pure . runStateGen_ (mkStdGen 42) $ \gen -> + (,) <$> (fromListN len <$> replicateM len (randomM gen)) <*> + (fromListN len <$> replicateM len (randomM gen)) + +-- We work in IO only to avoid interference, so thus, a cleanup isn't needed for +-- withResource. This function is designed to indicate that fact. +noCleanup :: forall (a :: Type) . a -> IO () +noCleanup = const (pure ()) + +-- Basic set of sizes (in bytes) +sizes :: [Int] +sizes = [(2 :: Int) ^ (i :: Int) - 1 | i <- [1 .. 15]] diff --git a/plutus-core/plutus-core/bench/bitwise/Main.hs b/plutus-core/plutus-core/bench/bitwise/Main.hs new file mode 100644 index 00000000000..3dc911b270c --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/Main.hs @@ -0,0 +1,55 @@ +-- editorconfig-checker-disable-file +{-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE OverloadedStrings #-} + +module Main (main) where + +import Benches.Binary qualified as Binary +import Benches.BitRead qualified as BitRead +import Benches.BitWrite qualified as BitWrite +import Benches.Complement qualified as Complement +import Benches.Convert qualified as Convert +import Benches.CountLeadingZeroes qualified as CountLeadingZeroes +import Benches.Popcount qualified as Popcount +import Benches.Rotate qualified as Rotate +import Benches.Shift qualified as Shift +import GHC.IO.Encoding (setLocaleEncoding, utf8) +import Test.Tasty (testGroup) +import Test.Tasty.Bench (defaultMain) + +main :: IO () +main = do + setLocaleEncoding utf8 + defaultMain [ + testGroup "Popcount" [ + Popcount.benches, + Popcount.cBenches + ], + testGroup "Complement" [ + Complement.benches + ], + testGroup "Binary" [ + Binary.benches + ], + testGroup "Count leading zeroes" [ + CountLeadingZeroes.benches, + CountLeadingZeroes.cBenches + ], + testGroup "Bit read" [ + BitRead.benches + ], + testGroup "Bit write" [ + BitWrite.benches + ], + testGroup "Bit shift" [ + Shift.benches + ], + testGroup "Bit rotate" [ + Rotate.benches + ], + testGroup "Conversions" [ + Convert.benchesBSToI, + Convert.benchesIToBS + ] + ] diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/binary.c b/plutus-core/plutus-core/bench/bitwise/cbits/binary.c new file mode 100644 index 00000000000..aee1a1b5f83 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/binary.c @@ -0,0 +1,8 @@ +#include "cbits.h" + +void c_and_naive(unsigned char *dst, unsigned char const *src1, + unsigned char const *src2, size_t const len) { + for (size_t i = 0; i < len; i++) { + dst[i] = src1[i] & src2[i]; + } +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/bit-access.c b/plutus-core/plutus-core/bench/bitwise/cbits/bit-access.c new file mode 100644 index 00000000000..41b6383cb12 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/bit-access.c @@ -0,0 +1,37 @@ +#include "cbits.h" +#include + +bool c_bit_at(size_t const ix, unsigned char *const restrict src) { + size_t big_ix = ix / 8; + size_t small_ix = ix % 8; + return src[big_ix] & (0x80 >> small_ix); +} + +void c_bit_set_naive(bool const b, size_t const ix, unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len) { + size_t big_ix = ix / 8; + size_t small_ix = ix % 8; + for (size_t i = 0; i < len; i++) { + dst[i] = src[i]; + } + if (b == true) { + dst[big_ix] = src[big_ix] | (0x80 >> small_ix); + } else { + dst[big_ix] = src[big_ix] & (~(0x80 >> small_ix)); + } +} + +void c_bit_set_memcpy(bool const b, size_t const ix, + unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len) { + size_t big_ix = ix / 8; + size_t small_ix = ix % 8; + // Copy entirety of src + memcpy(dst, src, len); + // Set our desired bit + if (b == true) { + dst[big_ix] = src[big_ix] | (0x80 >> small_ix); + } else { + dst[big_ix] = src[big_ix] & (~(0x80 >> small_ix)); + } +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/cbits.h b/plutus-core/plutus-core/bench/bitwise/cbits/cbits.h new file mode 100644 index 00000000000..8c6f025f484 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/cbits.h @@ -0,0 +1,54 @@ +#ifndef CBITS_H +#define CBITS_H + +#include +#include + +// Popcount + +size_t c_popcount_naive(unsigned char const *restrict src, size_t const len); + +size_t c_popcount_block(unsigned char const *restrict src, size_t len); + +size_t c_popcount_block_unroll(unsigned char const *restrict src, size_t len); + +// Complement + +void c_complement_naive(unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len); + +// Binary ops + +void c_and_naive(unsigned char *restrict dst, unsigned char const *src1, + unsigned char const *src2, size_t const len); + +// Bit reading and writing + +bool c_bit_at(size_t const ix, unsigned char *const restrict src); + +void c_bit_set_naive(bool const b, size_t const ix, unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len); + +void c_bit_set_memcpy(bool const b, size_t const ix, + unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len); + +// CLZ + +size_t c_clz_naive(unsigned char const *restrict src, size_t const len); + +size_t c_clz_block(unsigned char const *restrict src, size_t len); + +size_t c_clz_block_unrolled(unsigned char const *restrict src, size_t len); + +// Shift + +void c_shift_bits(int bit_shift, unsigned char *restrict dst, + unsigned char const *restrict src, size_t len); + +// Rotate + +void c_rotate_bits(int bit_rotation, unsigned char *restrict dst, + unsigned char const *restrict src, size_t len); + +#endif /* CBITS_H */ diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/clz.c b/plutus-core/plutus-core/bench/bitwise/cbits/clz.c new file mode 100644 index 00000000000..9941c8da3a0 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/clz.c @@ -0,0 +1,70 @@ +#include "cbits.h" +#include + +size_t c_clz_naive(unsigned char const *src, size_t const len) { + size_t leading_zeroes = 0; + for (size_t i = 0; i < len; i++) { + // Necessary because __builtin_clz has an undefined outcome if its argument + // is zero. + if (src[i] != 0) { + // This is necessary because GCC will sign-extend the ith byte whether we + // like it or not. Thus, we have to compensate. + size_t offset = (sizeof(unsigned int) - 1) * 8; + return leading_zeroes + (__builtin_clz(src[i]) - offset); + } + leading_zeroes += 8; + } + return leading_zeroes; +} + +size_t c_clz_block(unsigned char const *restrict src, size_t len) { + size_t leading_zeroes = 0; + while (len >= sizeof(unsigned long long)) { + unsigned long long x = *((unsigned long long const *restrict)src); + if (x != 0) { + return leading_zeroes + __builtin_clzll(x); + } + leading_zeroes += (sizeof(unsigned long long) * 8); + src += sizeof(unsigned long long); + len -= sizeof(unsigned long long); + } + while (len > 0) { + if ((*src) != 0) { + // Same necessity as before + size_t offset = (sizeof(unsigned int) - 1) * 8; + return leading_zeroes + (__builtin_clz(*src) - offset); + } + leading_zeroes += 8; + src++; + len--; + } + return leading_zeroes; +} + +size_t c_clz_block_unrolled(unsigned char const *restrict src, size_t len) { + size_t leading_zeroes = 0; + while (len >= 2 * sizeof(unsigned long long)) { + unsigned long long x = ((unsigned long long const *restrict)src)[0]; + unsigned long long y = ((unsigned long long const *restrict)src)[1]; + if (x != 0) { + return leading_zeroes + __builtin_clzll(x); + } + if (y != 0) { + return leading_zeroes + sizeof(unsigned long long) + __builtin_clzll(y); + } + leading_zeroes += (sizeof(unsigned long long) * 16); + src += 2 * sizeof(unsigned long long); + len -= 2 * sizeof(unsigned long long); + } + while (len > 0) { + if ((*src) != 0) { + // Same necessity as before + size_t offset = (sizeof(unsigned int) - 1) * 8; + return leading_zeroes + (__builtin_clz(*src) - offset); + } + leading_zeroes += 8; + src++; + len--; + } + return leading_zeroes; +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/complement.c b/plutus-core/plutus-core/bench/bitwise/cbits/complement.c new file mode 100644 index 00000000000..3395957de6d --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/complement.c @@ -0,0 +1,9 @@ +#include "cbits.h" +#include + +void c_complement_naive(unsigned char *restrict dst, + unsigned char const *restrict src, size_t const len) { + for (size_t i = 0; i < len; i++) { + dst[i] = ~(src[i]); + } +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/popcount.c b/plutus-core/plutus-core/bench/bitwise/cbits/popcount.c new file mode 100644 index 00000000000..0bcf714c8af --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/popcount.c @@ -0,0 +1,64 @@ +#include "cbits.h" +#include + +size_t c_popcount_naive(unsigned char const *restrict src, size_t const len) { + size_t total = 0; + for (size_t i = 0; i < len; i++) { + total += __builtin_popcount(src[i]); + } + return total; +} + +/* + * We take advantage of the fact that a single POPCNT instruction can count an + * entire register's worth of bits, rather than a single byte. To aid GCC in + * doing this, we do a classic strip mine: first count at unsigned long long + * width, then finish off with byte-at-a-time. + * + * Strip mining: + * http://physics.ujep.cz/~zmoravec/prga/main_for/mergedProjects/optaps_for/common/optaps_vec_mine.htm + */ +size_t c_popcount_block(unsigned char const *restrict src, size_t len) { + size_t total = 0; + while (len >= sizeof(unsigned long long)) { + total += __builtin_popcountll(*(unsigned long long const *restrict)src); + src += sizeof(unsigned long long); + len -= sizeof(unsigned long long); + } + while (len > 0) { + total += __builtin_popcount(*src); + src++; + len--; + } + return total; +} + +/* + * We further extend the popcount_block method by manually two-way unrolling + * the loop. This can take advantage of high throughput for the POPCNT + * instruction on modern x86 CPUs, as they can issue four POPCNT instructions + * simultaneously if data is available. + * + * Loop unrolling: + * https://en.wikipedia.org/wiki/Loop_unrolling#Static/manual_loop_unrolling + * Instruction tables for x86: + * https://www.agner.org/optimize/instruction_tables.pdf ILP (including + * multiple-issue): https://en.wikipedia.org/wiki/Instruction-level_parallelism + * Data dependency: https://en.wikipedia.org/wiki/Data_dependency + */ +size_t c_popcount_block_unroll(unsigned char const *restrict src, size_t len) { + size_t total = 0; + while (len >= 2 * sizeof(unsigned long long)) { + total += __builtin_popcountll(*(unsigned long long const *restrict)src); + total += __builtin_popcountll(*( + unsigned long long const *restrict)(src + sizeof(unsigned long long))); + src += 2 * sizeof(unsigned long long); + len -= 2 * sizeof(unsigned long long); + } + while (len > 0) { + total += __builtin_popcount(*src); + src++; + len--; + } + return total; +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/rotate.c b/plutus-core/plutus-core/bench/bitwise/cbits/rotate.c new file mode 100644 index 00000000000..df82cf660ab --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/rotate.c @@ -0,0 +1,25 @@ +#include "cbits.h" + +void c_rotate_bits(int bit_rotation, unsigned char *restrict dst, + unsigned char const *restrict src, size_t len) { + if (bit_rotation > 0) { + size_t read_pos = bit_rotation / 8; + size_t bit_tail_len = bit_rotation % 8; + size_t write_pos = 0; + unsigned char const tail_mask = (0x01 << bit_tail_len) - 1; + unsigned char const head_mask = ~tail_mask; + while (write_pos < len) { + if (read_pos == len - 1) { + dst[write_pos] = (head_mask & src[read_pos]) | (tail_mask & src[0]); + read_pos = 0; + } else { + dst[write_pos] = + (head_mask & src[read_pos]) | (tail_mask & src[read_pos + 1]); + read_pos++; + } + write_pos++; + } + } else { + c_rotate_bits((len * 8) + bit_rotation, dst, src, len); + } +} diff --git a/plutus-core/plutus-core/bench/bitwise/cbits/shift.c b/plutus-core/plutus-core/bench/bitwise/cbits/shift.c new file mode 100644 index 00000000000..9b50d18d689 --- /dev/null +++ b/plutus-core/plutus-core/bench/bitwise/cbits/shift.c @@ -0,0 +1,59 @@ +#include "cbits.h" +#include +#include + +void c_shift_bits(int bit_shift, unsigned char *restrict dst, + unsigned char const *restrict src, size_t len) { + if (bit_shift > 0) { + size_t infill_bytes = bit_shift / 8; + size_t const bit_head_len = bit_shift % 8; + if (infill_bytes > len) { + infill_bytes = len; + } + memset(dst, 0x00, infill_bytes); + if (bit_head_len == 0) { + memcpy(dst + infill_bytes, src, len - infill_bytes); + } else { + size_t read_pos = 0; + size_t write_pos = infill_bytes; + unsigned char const hi_mask = (0x01 << bit_head_len) - 1; + unsigned char const lo_mask = ~hi_mask; + while (write_pos < len) { + if (read_pos == 0) { + dst[write_pos] = hi_mask & src[read_pos]; + } else { + dst[write_pos] = + (lo_mask & src[read_pos - 1]) | (hi_mask & src[read_pos]); + } + write_pos++; + read_pos++; + } + } + } else { + size_t const abs_bit_shift = abs(bit_shift); + size_t infill_bytes = abs_bit_shift / 8; + size_t const bit_tail_len = abs_bit_shift % 8; + if (infill_bytes > len) { + infill_bytes = len; + } + if (bit_tail_len == 0) { + memcpy(dst, src + infill_bytes, len - infill_bytes); + } else { + size_t read_pos = infill_bytes; + size_t write_pos = 0; + unsigned char const hi_mask = (0x01 << bit_tail_len) - 1; + unsigned char const lo_mask = ~hi_mask; + while (read_pos < len) { + if (read_pos == (len - 1)) { + dst[write_pos] = lo_mask & src[read_pos]; + } else { + dst[write_pos] = + (lo_mask & src[read_pos]) | (hi_mask & src[read_pos + 1]); + } + write_pos++; + read_pos++; + } + } + memset(dst + (len - infill_bytes), 0x00, infill_bytes); + } +} diff --git a/plutus-core/plutus-core/src/Bitwise.hs b/plutus-core/plutus-core/src/Bitwise.hs new file mode 100644 index 00000000000..c442ca14ae7 --- /dev/null +++ b/plutus-core/plutus-core/src/Bitwise.hs @@ -0,0 +1,500 @@ +-- editorconfig-checker-disable-file +{-# LANGUAGE ImpredicativeTypes #-} +{-# LANGUAGE KindSignatures #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE UnboxedSums #-} +{-# OPTIONS_GHC -fexpose-all-unfoldings #-} + +-- FIXME: Should be its own library +module Bitwise ( + integerToByteString, + byteStringToInteger, + andByteString, + iorByteString, + xorByteString, + complementByteString, + popCountByteString, + testBitByteString, + writeBitByteString, + findFirstSetByteString, + shiftByteString, + rotateByteString, + ) where + +import Data.Bits (FiniteBits, bit, complement, popCount, rotate, shift, shiftL, xor, zeroBits, + (.&.), (.|.)) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.ByteString.Internal (toForeignPtr0) +import Data.ByteString.Short qualified as SBS +import Data.ByteString.Unsafe (unsafePackMallocCStringLen, unsafeUseAsCString, + unsafeUseAsCStringLen) +import Data.Foldable (foldl', for_) +import Data.Functor (void) +import Data.Kind (Type) +import Data.Text (Text, pack) +import Data.Word (Word64, Word8) +import Foreign.C.Types (CChar, CSize) +import Foreign.Marshal.Alloc (mallocBytes) +import Foreign.Ptr (Ptr, castPtr, plusPtr) +import Foreign.Storable (Storable (peek, poke, sizeOf)) +import GHC.ForeignPtr (ForeignPtr (ForeignPtr)) +import GHC.IO.Handle.Text (memcpy) +import GHC.Num.Integer (Integer (IN), integerFromAddr, integerToBigNatClamp#) +import GHC.Prim (int2Word#) +import GHC.Types (Int (I#)) +import PlutusCore.Builtin.Emitter (Emitter, emit) +import PlutusCore.Evaluation.Result (EvaluationResult (EvaluationFailure)) +import System.IO.Unsafe (unsafeDupablePerformIO) + +-- | See 'PlutusTx.Builtins.rotateByteString'. +rotateByteString :: ByteString -> Integer -> ByteString +rotateByteString bs i + -- If a ByteString is completely homogenous, rotating won't change it. This + -- also covers emptiness, since empty ByteStrings are homogenous vacuously. + | isAllZero bs || isAllOne bs = bs + -- Rotating by more than the number of bits in a ByteString 'wraps around', + -- so we're only interested in the rotation modulo the number of bits. + | otherwise = case i `mod` bitLen of + -- Means we have a multiple of the bit count, so nothing to do. + 0 -> bs + displacement -> overPtrLen bs $ \ptr len -> go ptr len displacement + where + -- not recursive! + go :: Ptr Word8 -> Int -> Integer -> IO (Ptr Word8) + go src len displacement = do + dst <- mallocBytes len + case len of + -- If we only have one byte, we can borrow from the Bits instance for + -- Word8, since it rotates in the same direction that we want. + 1 -> do + srcByte <- peek src + let srcByte' = srcByte `rotate` fromIntegral displacement + poke dst srcByte' + -- If we rotate by a multiple of 8, we only need to move around whole + -- bytes, rather than individual bits. Because we only move contiguous + -- blocks (regardless of rotation direction), we can do this using + -- memcpy, which is much faster, especially on larger ByteStrings. + _ -> case displacement `quotRem` 8 of + (bigMove, 0) -> do + let mainLen :: CSize = fromIntegral $ bigMove + let restLen :: CSize = fromIntegral len - mainLen + -- Copy the portion [..mainLen] to [restLen..], + -- and the portion [mainLen..] to [..restLen]. + _ <- memcpy (plusPtr dst (fromIntegral restLen)) src mainLen + _ <- memcpy dst (plusPtr src (fromIntegral mainLen)) restLen + pure () + -- If we don't rotate by a multiple of 8, we have to construct new + -- bytes, rather than just copying over old ones. We do this in two + -- steps: + -- + -- 1. Compute the 'read offset' into the source ByteString based on + -- the rotation magnitude and direction. + -- 2. Use that read offset to perform an (unchecked) bit lookup for an + -- entire 8-bit block, then construct the byte that results. + -- + -- We can do the bytes in the result in any order using this method: + -- we choose to do it in traversal order. + _ -> for_ [0 .. len - 1] $ \j -> do + let start = (len - 1 - j) * 8 + let dstByte = foldl' (addBit start displacement) zeroBits [0 .. 7] + poke (plusPtr dst j) dstByte + pure dst + bitLen :: Integer + bitLen = fromIntegral $ BS.length bs * 8 + addBit :: Int -> Integer -> Word8 -> Integer -> Word8 + addBit start displacement acc offset = + let oldIx = (offset + fromIntegral start + bitLen - displacement) `rem` bitLen in + if dangerousRead bs oldIx + then acc .|. (bit . fromIntegral $ offset) + else acc + +-- | See 'PlutusTx.Builtins.shiftByteString. +shiftByteString :: ByteString -> Integer -> ByteString +shiftByteString bs i + -- Shifting by the number of bits, or more, would zero everything anyway, + -- regardless of direction. This also covers the empty ByteString case, as its + -- bit length is zero. + | abs i >= bitLen = BS.replicate (BS.length bs) zeroBits + -- Shifting an all-zero ByteString will not change it, regardless of + -- direction. + | isAllZero bs = bs + | otherwise = overPtrLen bs go + where + bitLen :: Integer + bitLen = fromIntegral $ BS.length bs * 8 + go :: Ptr Word8 -> Int -> IO (Ptr Word8) + go src len = do + dst <- mallocBytes len + case len of + -- If we only have one byte, we can borrow from the Bits instance for + -- Word8, since it shifts in the same direction that we want. + 1 -> do + srcByte <- peek src + let srcByte' = srcByte `shift` fromIntegral i + poke dst srcByte' + -- If we shift by a multiple of 8, we only need to move a contiguous + -- block of bytes, then clear what remains. This is much more efficient: + -- it would be nice if we had memset available, but at least the copy + -- can be done with memcpy. + _ -> case i `quotRem` 8 of + (bigMove, 0) -> do + let mainLen :: CSize = fromIntegral . abs $ bigMove + let restLen :: CSize = fromIntegral len - mainLen + case signum bigMove of + 1 -> do + void . memcpy dst (plusPtr src . fromIntegral $ mainLen) $ restLen + for_ [fromIntegral restLen, fromIntegral $ restLen + 1 .. len - 1] $ \j -> + poke @Word8 (plusPtr dst j) zeroBits + _ -> do + for_ [0 .. fromIntegral mainLen - 1] $ \j -> poke @Word8 (plusPtr dst j) zeroBits + void . memcpy (plusPtr dst . fromIntegral $ mainLen) src $ restLen + -- If we shift by something other than a multiple of 8, we have to + -- construct new bytes, similarly to rotations. We use the same + -- two-step process to construct new bytes, but due to not having the + -- 'wraparound' behaviour (unlike rotations), we clear any bits that + -- would be sourced 'out of bounds'. + _ -> for_ [0 .. len - 1] $ \j -> do + let start = (len - 1 - j) * 8 + let dstByte = foldl' (addBit start) zeroBits [0 .. 7] + poke (plusPtr dst j) dstByte + pure dst + addBit :: Int -> Word8 -> Integer -> Word8 + addBit start acc offset = + let possibleIx = offset + fromIntegral start - i in + if | possibleIx < 0 -> acc + | possibleIx >= bitLen -> acc + | dangerousRead bs possibleIx -> acc .|. (bit . fromIntegral $ offset) + | otherwise -> acc + +-- | See 'PlutusTx.Builtins.findFirstSetByteString'. +findFirstSetByteString :: ByteString -> Integer +findFirstSetByteString bs = foldl' go (-1) [0 .. len - 1] + where + go :: Integer -> Int -> Integer + go acc ix + | acc /= (-1) = acc -- we found one already + | otherwise = case BS.index bs (len - ix - 1) of + 0 -> (-1) -- keep looking, nothing to find here + w8 -> fromIntegral $ (ix * 8) + findPosition w8 + len :: Int + len = BS.length bs + +-- | See 'PlutusTx.Builtins.testBitByteString. +testBitByteString :: ByteString -> Integer -> Emitter (EvaluationResult Bool) +testBitByteString bs i + | i < 0 || i >= bitLen = indexOutOfBoundsError "testBitByteString" bitLen i + | otherwise = pure . pure . dangerousRead bs $ i + where + bitLen :: Integer + bitLen = fromIntegral $ BS.length bs * 8 + +-- | See 'PlutusTx.Builtins.writeBitByteString. +writeBitByteString :: ByteString -> Integer -> Bool -> Emitter (EvaluationResult ByteString) +writeBitByteString bs i b + | i < 0 || i >= bitLen = indexOutOfBoundsError "writeBitByteString" bitLen i + -- When we write a bit at a location, we have to return a new copy of the + -- original with the bit modified. We do this as follows: + -- + -- 1. Compute the byte that has to change. Because _byte_ indexes and _bit_ + -- indexes go in opposite directions, we have to compute the byte by a + -- combination of modulus and offset from the end. + -- 2. Use the remainder to construct a mask which 'selects' the bit within the + -- byte we want to change. + -- 3. Memcpy everything over. + -- 4. Use the mask at the computed byte index to modify the result in-place: + -- we do a different operation depending on whether we're setting or clearing. + -- + -- We use memcpy plus a single write as this is _much_ faster than going + -- byte-by-byte and checking if we've reached the index we want each time: + -- memcpy is highly-optimized using SIMD instructions on every platform, and a + -- branchy per-byte loop is absolutely horrid everywhere for speed due to the + -- branch count. + | otherwise = do + let (bigOffset, smallOffset) = i `quotRem` 8 + let bigIx = fromIntegral $ byteLen - bigOffset - 1 + let mask = bit 0 `shiftL` fromIntegral smallOffset + pure . pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ go bigIx mask + where + byteLen :: Integer + byteLen = fromIntegral . BS.length $ bs + bitLen :: Integer + bitLen = byteLen * 8 + go :: Int -> Word8 -> (Ptr CChar, Int) -> IO ByteString + go bigIx mask (src, len) = do + dst <- mallocBytes len + void . memcpy dst src . fromIntegral $ len + byte :: Word8 <- peek . plusPtr src $ bigIx + let byte' = if b then mask .|. byte else complement mask .&. byte + poke (castPtr . plusPtr dst $ bigIx) byte' + unsafePackMallocCStringLen (dst, len) + +-- | See 'PlutusTx.Builtins.integerToByteString. +{-# INLINE integerToByteString #-} +integerToByteString :: Integer -> Maybe ByteString +integerToByteString (IN _) = Nothing +integerToByteString n = Just $ fst $ BS.spanEnd (== 0) $ SBS.fromShort $ SBS.SBS (integerToBigNatClamp# n) + +-- | See 'PlutusTx.Builtins.byteStringToInteger. +{-# INLINE byteStringToInteger #-} +byteStringToInteger :: ByteString -> Integer +byteStringToInteger bs = + case toForeignPtr0 bs of + (ForeignPtr addr _, I# len) -> unsafeDupablePerformIO $ integerFromAddr (int2Word# len) addr (case 0 of I# n -> n) + +-- | See 'PlutusTx.Builtins.popCountByteString. +popCountByteString :: ByteString -> Integer +popCountByteString bs = unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ go + where + -- We use a standard 'big step, small step' approach. The reason for this is + -- that bit counting (via a FiniteBits instance) is defined for much larger + -- types than Word8. We can thus read 8-blocks of bytes as 64-bit words + -- instead (as we don't care about sign and GHC ensures word alignment), + -- which gives us potentially up to an 8x speedup. + -- + -- Thus, our 'big step, small step' approach first walks as much of its + -- input as it can using steps whose size is Word64, then finishes the job + -- with steps whose size is Word8. We use a rank-2 polymorphic method to + -- avoid code duplication, since the only operation we need comes from a + -- type class, and is thus agnostic to what we're working over. Step size + -- can also be determined via Storable in a similar way. + go :: (Ptr CChar, Int) -> IO Integer + go (ptr, len) = do + let (bigSteps, smallSteps) = len `quotRem` 8 + let bigPtr :: Ptr Word64 = castPtr ptr + let smallPtr :: Ptr Word8 = castPtr . plusPtr ptr $ bigSteps * 8 + bigCount <- countBits bigPtr bigSteps + smallCount <- countBits smallPtr smallSteps + pure . fromIntegral $ bigCount + smallCount + +-- We use a standard 'big step, small step' construction for all the operators +-- below. The reason for this is that each of these operations are bit-parallel: +-- it doesn't matter what width of bit block you operate on, you'll have the +-- same outcome. As a result, these operations are defined for much larger +-- blocks than Word8. We can thus read 8-blocks of bytes as 64-bit words instead +-- (as we don't care about sign and GHC ensures word alignment), which gives us +-- potentially up to an 8x speedup. +-- +-- Thus, our 'big step, small step' approach processes the inputs in two stages: +-- +-- 1. Walk lockstep in blocks of Word64 size over both inputs, and set the +-- corresponding place in the output to the result of the bitwise operation on +-- those blocks. +-- 2. For whatever remains, walk lockstep in blocks of Word8 size over both +-- inputs, and set the corresponding place in the output to the result of the +-- bitwise operation on those blocks. +-- +-- We use a rank-2 polymorphic method to avoid code duplication, since all of +-- the operations over blocks we are interested in (of either size) come from a +-- type class (Bits) without caring about what specific type we're dealing with. +-- Step size can also be determined via Storable in a similar way. +-- +-- We use a mutable construction inside IO instead of something immutable to +-- avoid excessive 'sloshing': on our current version of the 'bytestring' +-- library, there is no way to 'zip together' two ByteStrings directly: your +-- only option was to 'zip out' into a list, then rebuild. This is not only +-- inefficient (as you can't do a 'big step, little step' approach to this in +-- general), it also copies too much. +-- | See 'PlutusTx.Builtins.andByteString. +andByteString :: ByteString -> ByteString -> Emitter (EvaluationResult ByteString) +andByteString bs bs' + | BS.length bs /= BS.length bs' = mismatchedLengthError "andByteString" bs bs' + | otherwise = pure . pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(ptr, len) -> + unsafeUseAsCString bs' $ \ptr' -> + zipBuild (.&.) ptr ptr' len >>= (unsafePackMallocCStringLen . (,len)) + +-- | See 'PlutusTx.Builtins.iorByteString. +iorByteString :: ByteString -> ByteString -> Emitter (EvaluationResult ByteString) +iorByteString bs bs' + | BS.length bs /= BS.length bs' = mismatchedLengthError "iorByteString" bs bs' + | otherwise = pure . pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(ptr, len) -> + unsafeUseAsCString bs' $ \ptr' -> + zipBuild (.|.) ptr ptr' len >>= (unsafePackMallocCStringLen . (,len)) + +-- | See 'PlutusTx.Builtins.xorByteString. +xorByteString :: ByteString -> ByteString -> Emitter (EvaluationResult ByteString) +xorByteString bs bs' + | BS.length bs /= BS.length bs' = mismatchedLengthError "xorByteString" bs bs' + | otherwise = pure . pure . unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(ptr, len) -> + unsafeUseAsCString bs' $ \ptr' -> + zipBuild xor ptr ptr' len >>= (unsafePackMallocCStringLen . (,len)) + +-- Similarly to the above, we use a 'big step, little step' here as well. The +-- only difference is that there is only one input to read from, rather than +-- two. Similar reasoning applies to why we made this choice as to the +-- previous operations. + +-- | See 'PlutusTx.Builtins.complementByteString. +complementByteString :: ByteString -> ByteString +complementByteString bs = unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ \(ptr, len) -> do + resPtr <- mallocBytes len + let (bigSteps, smallSteps) = len `quotRem` 8 + let bigDst :: Ptr Word64 = castPtr resPtr + let smallDst :: Ptr Word8 = castPtr . plusPtr resPtr $ bigSteps * 8 + let bigSrc :: Ptr Word64 = castPtr ptr + let smallSrc :: Ptr Word8 = castPtr . plusPtr ptr $ bigSteps * 8 + go bigDst bigSrc 0 bigSteps + go smallDst smallSrc 0 smallSteps + unsafePackMallocCStringLen (resPtr, len) + where + go :: forall (a :: Type) . + (Storable a, FiniteBits a) => + Ptr a -> Ptr a -> Int -> Int -> IO () + go dst src offset lim + | offset == lim = pure () + | otherwise = do + let offset' = offset * sizeOf (undefined :: a) + block :: a <- peek . plusPtr src $ offset' + poke (plusPtr dst offset') . complement $ block + go dst src (offset + 1) lim + +-- Helpers + +-- We compute the read similarly to how we determine the change when we write. +-- The only difference is that the mask is used on the input to read it, rather +-- than to modify anything. +dangerousRead :: ByteString -> Integer -> Bool +dangerousRead bs i = + let (bigOffset, smallOffset) = i `quotRem` 8 + bigIx = BS.length bs - fromIntegral bigOffset - 1 + mask = bit (fromIntegral smallOffset) in + case mask .&. BS.index bs bigIx of + 0 -> False + _ -> True + +-- Important note: this function is only safe under the following conditions: +-- +-- * The IO used in the function argument only performs memory allocations using +-- malloc, as well as reads and writes via the Storable interface; +-- * The pointer argument is only read from, not written to; +-- * The result of the function argument points to freshly-allocated, malloced +-- memory; and +-- * The result of the function argument points to memory whose length matches +-- that of the input ByteString (in bytes) +-- +-- Even though a ByteString is represented as Ptr CChar, we can ignore sign (we +-- only treat them as binary data anyway), and on POSIX platforms (which GHC +-- silently assumes, even on Windows), CChar _must_ be exactly a byte. Thus, we +-- allow working over a pointer to Word8 instead, to avoid issues with signs. +overPtrLen :: ByteString -> (Ptr Word8 -> Int -> IO (Ptr Word8)) -> ByteString +overPtrLen bs f = unsafeDupablePerformIO . unsafeUseAsCStringLen bs $ + \(ptr, len) -> f (castPtr ptr) len >>= \p -> + unsafePackMallocCStringLen (castPtr p, len) + +-- Error used when lengths of inputs aren't equal. +mismatchedLengthError :: forall (a :: Type) . + Text -> + ByteString -> + ByteString -> + Emitter (EvaluationResult a) +mismatchedLengthError loc bs bs' = do + emit $ loc <> " failed" + emit "Reason: mismatched argument lengths" + emit $ "Length of first argument: " <> (pack . show . BS.length $ bs) + emit $ "Length of second argument: " <> (pack . show . BS.length $ bs') + pure EvaluationFailure + +-- Error used when an out of bounds index is used to index a bytestring. +indexOutOfBoundsError :: forall (a :: Type) . + Text -> + Integer -> + Integer -> + Emitter (EvaluationResult a) +indexOutOfBoundsError loc lim i = do + emit $ loc <> " failed" + emit "Reason: out of bounds" + emit $ "Attempted access at index " <> (pack . show $ i) + emit $ "Valid indexes: from 0 to " <> (pack . show $ lim - 1) + pure EvaluationFailure + +-- A general method for 'zipping together' two ByteString inputs to produce a +-- new ByteString output, assuming the 'zipping function' is bit-parallel. This +-- uses a standard 'big step, little step' construction. We can do this because +-- bit-parallel operations don't change semantics based on the size of the block +-- read; furthermore, as GHC guarantees word alignment and we don't care about +-- sign, we can potentially get up to an 8x speedup. +-- +-- We use a mutable construction inside IO instead of something immutable to +-- avoid excessive 'sloshing': on our current version of the 'bytestring' +-- library, there is no way to 'zip together' two ByteStrings directly: your +-- only option was to 'zip out' into a list, then rebuild. This is not only +-- inefficient (as you can't do a 'big step, little step' approach to this in +-- general), it also copies too much. +-- +-- Note: the function argument must be bit-parallel. The type guarantees it to +-- some degree, but in general, we can't enforce this in the type system. +zipBuild :: + (forall (a :: Type) . (FiniteBits a, Storable a) => a -> a -> a) -> + Ptr CChar -> + Ptr CChar -> + Int -> + IO (Ptr CChar) +zipBuild f ptr ptr' len = do + resPtr <- mallocBytes len + let (bigSteps, smallSteps) = len `quotRem` 8 + let bigPtr :: Ptr Word64 = castPtr resPtr + let smallPtr :: Ptr Word8 = castPtr . plusPtr resPtr $ bigSteps * 8 + go bigPtr (castPtr ptr) (castPtr ptr') 0 bigSteps + let ptrRest :: Ptr Word8 = castPtr . plusPtr ptr $ bigSteps * 8 + let ptrRest' :: Ptr Word8 = castPtr . plusPtr ptr' $ bigSteps * 8 + go smallPtr ptrRest ptrRest' 0 smallSteps + pure resPtr + where + go :: forall (b :: Type) . + (FiniteBits b, Storable b) => + Ptr b -> + Ptr b -> + Ptr b -> + Int -> + Int -> + IO () + go dst src src' offset lim + | offset == lim = pure () + | otherwise = do + let offset' = sizeOf (undefined :: b) * offset + block :: b <- peek . plusPtr src $ offset' + block' :: b <- peek . plusPtr src' $ offset' + poke (plusPtr dst offset') (f block block') + go dst src src' (offset + 1) lim + +-- Check every bit position in a byte for a set bit, returning its index if we +-- find one. We default return 7, even though this index is valid, as no +-- consumer function ever looks at this value, since that can only happen on +-- zero bytes, which we ignore anyway. +findPosition :: Word8 -> Int +findPosition w8 = foldl' go 7 . fmap (\i -> (i, bit 0 `shiftL` i)) $ [0 .. 7] + where + go :: Int -> (Int, Word8) -> Int + go acc (i, mask) = case mask .&. w8 of + 0 -> acc -- nothing to see here, move along + _ -> min acc i + +-- A polymorphic bit counter in a block, which we can segment by chunks of a +-- type of arbitrary size, provided it is both Storable (so we can read at +-- offsets) and FiniteBits (so we can count it). +countBits :: forall (a :: Type) . + (FiniteBits a, Storable a) => + Ptr a -> Int -> IO Int +countBits ptr len = go 0 0 + where + go :: Int -> Int -> IO Int + go total offset + | offset == len = pure total + | otherwise = do + let offset' = offset * sizeOf (undefined :: a) + block :: a <- peek . plusPtr ptr $ offset' + let total' = total + popCount block + go total' (offset + 1) + +-- Check if every byte of a ByteString is zero +isAllZero :: ByteString -> Bool +isAllZero = BS.all (== zeroBits) + +-- Check if every byte of a ByteString is one +isAllOne :: ByteString -> Bool +isAllOne = BS.all (== complement zeroBits) diff --git a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs index 4bb0d601bd9..109f66661de 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Default/Builtins.hs @@ -20,18 +20,15 @@ import PlutusCore.Builtin import PlutusCore.Data import PlutusCore.Default.Universe import PlutusCore.Evaluation.Machine.BuiltinCostModel +import PlutusCore.Evaluation.Machine.ExBudget (ExBudget (ExBudget)) import PlutusCore.Evaluation.Machine.ExBudgetStream import PlutusCore.Evaluation.Machine.ExMemoryUsage import PlutusCore.Evaluation.Result import PlutusCore.Pretty -import PlutusCore.Crypto.BLS12_381.G1 qualified as BLS12_381.G1 -import PlutusCore.Crypto.BLS12_381.G2 qualified as BLS12_381.G2 -import PlutusCore.Crypto.BLS12_381.Pairing qualified as BLS12_381.Pairing -import PlutusCore.Crypto.Ed25519 (verifyEd25519Signature_V1, verifyEd25519Signature_V2) -import PlutusCore.Crypto.Hash qualified as Hash -import PlutusCore.Crypto.Secp256k1 (verifyEcdsaSecp256k1Signature, verifySchnorrSecp256k1Signature) - +import Bitwise (andByteString, byteStringToInteger, complementByteString, findFirstSetByteString, + integerToByteString, iorByteString, popCountByteString, rotateByteString, + shiftByteString, testBitByteString, writeBitByteString, xorByteString) import Codec.Serialise (serialise) import Data.ByteString qualified as BS import Data.ByteString.Lazy qualified as BSL @@ -42,6 +39,12 @@ import Data.Text.Encoding (decodeUtf8', encodeUtf8) import Flat hiding (from, to) import Flat.Decoder import Flat.Encoder as Flat +import PlutusCore.Crypto.BLS12_381.G1 qualified as BLS12_381.G1 +import PlutusCore.Crypto.BLS12_381.G2 qualified as BLS12_381.G2 +import PlutusCore.Crypto.BLS12_381.Pairing qualified as BLS12_381.Pairing +import PlutusCore.Crypto.Ed25519 (verifyEd25519Signature_V1, verifyEd25519Signature_V2) +import PlutusCore.Crypto.Hash qualified as Hash +import PlutusCore.Crypto.Secp256k1 (verifyEcdsaSecp256k1Signature, verifySchnorrSecp256k1Signature) import Prettyprinter (viaShow) -- See Note [Pattern matching on built-in types]. @@ -147,6 +150,19 @@ data DefaultFun -- Keccak_256, Blake2b_224 | Keccak_256 | Blake2b_224 + -- Bitwise + | IntegerToByteString + | ByteStringToInteger + | AndByteString + | IorByteString + | XorByteString + | ComplementByteString + | ShiftByteString + | RotateByteString + | PopCountByteString + | TestBitByteString + | WriteBitByteString + | FindFirstSetByteString deriving stock (Show, Eq, Ord, Enum, Bounded, Generic, Ix) deriving anyclass (NFData, Hashable, PrettyBy PrettyConfigPlc) @@ -1470,6 +1486,7 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where makeBuiltinMeaning BLS12_381.Pairing.finalVerify (runCostingFunTwoArguments . paramBls12_381_finalVerify) + -- Keccak_256, Blake2b_224 toBuiltinMeaning _ver Keccak_256 = makeBuiltinMeaning Hash.keccak_256 @@ -1478,6 +1495,61 @@ instance uni ~ DefaultUni => ToBuiltinMeaning uni DefaultFun where makeBuiltinMeaning Hash.blake2b_224 (runCostingFunOneArgument . paramBlake2b_224) + -- Bitwise + toBuiltinMeaning _ver IntegerToByteString = + makeBuiltinMeaning integerToByteStringPlc (\_ _ -> ExBudgetLast $ ExBudget 0 0) + where + integerToByteStringPlc :: SomeConstant uni Integer -> EvaluationResult BS.ByteString + integerToByteStringPlc (SomeConstant (Some (ValueOf uni n))) = do + DefaultUniInteger <- pure uni + case integerToByteString n of + Just bs -> pure $ bs + Nothing -> fail "negative integer passed to integerByteString" + {-# INLINE integerToByteStringPlc #-} + toBuiltinMeaning _ver ByteStringToInteger = + makeBuiltinMeaning + byteStringToInteger + (runCostingFunOneArgument . paramByteStringToInteger) + toBuiltinMeaning _ver AndByteString = + makeBuiltinMeaning + andByteString + (runCostingFunTwoArguments . paramAndByteString) + toBuiltinMeaning _ver IorByteString = + makeBuiltinMeaning + iorByteString + (runCostingFunTwoArguments . paramIorByteString) + toBuiltinMeaning _ver XorByteString = + makeBuiltinMeaning + xorByteString + (runCostingFunTwoArguments . paramXorByteString) + toBuiltinMeaning _ver ComplementByteString = + makeBuiltinMeaning + complementByteString + (runCostingFunOneArgument . paramComplementByteString) + toBuiltinMeaning _ver ShiftByteString = + makeBuiltinMeaning + shiftByteString + (runCostingFunTwoArguments . paramShiftByteString) + toBuiltinMeaning _ver RotateByteString = + makeBuiltinMeaning + rotateByteString + (runCostingFunTwoArguments . paramRotateByteString) + toBuiltinMeaning _ver PopCountByteString = + makeBuiltinMeaning + popCountByteString + (runCostingFunOneArgument . paramPopCountByteString) + toBuiltinMeaning _ver TestBitByteString = + makeBuiltinMeaning + testBitByteString + (runCostingFunTwoArguments . paramTestBitByteString) + toBuiltinMeaning _ver WriteBitByteString = + makeBuiltinMeaning + writeBitByteString + (runCostingFunThreeArguments . paramWriteBitByteString) + toBuiltinMeaning _ver FindFirstSetByteString = + makeBuiltinMeaning + findFirstSetByteString + (runCostingFunOneArgument . paramFindFirstSetByteString) -- See Note [Inlining meanings of builtins]. {-# INLINE toBuiltinMeaning #-} @@ -1585,6 +1657,18 @@ instance Flat DefaultFun where Bls12_381_finalVerify -> 70 Keccak_256 -> 71 Blake2b_224 -> 72 + IntegerToByteString -> 73 + ByteStringToInteger -> 74 + AndByteString -> 75 + IorByteString -> 76 + XorByteString -> 77 + ComplementByteString -> 78 + ShiftByteString -> 79 + RotateByteString -> 80 + PopCountByteString -> 81 + TestBitByteString -> 82 + WriteBitByteString -> 83 + FindFirstSetByteString -> 84 decode = go =<< decodeBuiltin where go 0 = pure AddInteger @@ -1660,6 +1744,19 @@ instance Flat DefaultFun where go 70 = pure Bls12_381_finalVerify go 71 = pure Keccak_256 go 72 = pure Blake2b_224 + go 73 = pure IntegerToByteString + go 74 = pure ByteStringToInteger + go 75 = pure AndByteString + go 76 = pure IorByteString + go 77 = pure XorByteString + go 78 = pure ComplementByteString + go 79 = pure ShiftByteString + go 80 = pure RotateByteString + go 81 = pure PopCountByteString + go 82 = pure TestBitByteString + go 83 = pure WriteBitByteString + go 84 = pure FindFirstSetByteString + go t = fail $ "Failed to decode builtin tag, got: " ++ show t size _ n = n + builtinTagWidth diff --git a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/BuiltinCostModel.hs b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/BuiltinCostModel.hs index dca1307c969..938d1eb3522 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/BuiltinCostModel.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/BuiltinCostModel.hs @@ -157,6 +157,19 @@ data BuiltinCostModelBase f = -- Keccak_256, Blake2b_224 , paramKeccak_256 :: f ModelOneArgument , paramBlake2b_224 :: f ModelOneArgument + -- Bitwise operations + , paramIntegerToByteString :: f ModelOneArgument + , paramByteStringToInteger :: f ModelOneArgument + , paramAndByteString :: f ModelTwoArguments + , paramIorByteString :: f ModelTwoArguments + , paramXorByteString :: f ModelTwoArguments + , paramComplementByteString :: f ModelOneArgument + , paramShiftByteString :: f ModelTwoArguments + , paramRotateByteString :: f ModelTwoArguments + , paramPopCountByteString :: f ModelOneArgument + , paramTestBitByteString :: f ModelTwoArguments + , paramWriteBitByteString :: f ModelThreeArguments + , paramFindFirstSetByteString :: f ModelOneArgument } deriving stock (Generic) deriving anyclass (FunctorB, TraversableB, ConstraintsB) diff --git a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/ExBudgetingDefaults.hs b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/ExBudgetingDefaults.hs index 67784d66d08..7283c46bfa5 100644 --- a/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/ExBudgetingDefaults.hs +++ b/plutus-core/plutus-core/src/PlutusCore/Evaluation/Machine/ExBudgetingDefaults.hs @@ -198,5 +198,18 @@ unitCostBuiltinCostModel = BuiltinCostModelBase -- Keccak_256, Blake2b_224 , paramKeccak_256 = unitCostOneArgument , paramBlake2b_224 = unitCostOneArgument + -- Bitwise operations + , paramIntegerToByteString = unitCostOneArgument + , paramByteStringToInteger = unitCostOneArgument + , paramAndByteString = unitCostTwoArguments + , paramIorByteString = unitCostTwoArguments + , paramXorByteString = unitCostTwoArguments + , paramComplementByteString = unitCostOneArgument + , paramShiftByteString = unitCostTwoArguments + , paramRotateByteString = unitCostTwoArguments + , paramPopCountByteString = unitCostOneArgument + , paramTestBitByteString = unitCostTwoArguments + , paramWriteBitByteString = unitCostThreeArguments + , paramFindFirstSetByteString = unitCostOneArgument } diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/AndByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/AndByteString.plc.golden new file mode 100644 index 00000000000..01714abd65b --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/AndByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con bytestring) (con bytestring))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/ByteStringToInteger.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ByteStringToInteger.plc.golden new file mode 100644 index 00000000000..6495d849f0e --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ByteStringToInteger.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (con integer)) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/ComplementByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ComplementByteString.plc.golden new file mode 100644 index 00000000000..e8a4293b5b2 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ComplementByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (con bytestring)) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/FindFirstSetByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/FindFirstSetByteString.plc.golden new file mode 100644 index 00000000000..6495d849f0e --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/FindFirstSetByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (con integer)) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/IntegerToByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/IntegerToByteString.plc.golden new file mode 100644 index 00000000000..e0f1cb81a8e --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/IntegerToByteString.plc.golden @@ -0,0 +1 @@ +(fun (con integer) (con bytestring)) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/IorByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/IorByteString.plc.golden new file mode 100644 index 00000000000..01714abd65b --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/IorByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con bytestring) (con bytestring))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/PopCountByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/PopCountByteString.plc.golden new file mode 100644 index 00000000000..6495d849f0e --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/PopCountByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (con integer)) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/RotateByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/RotateByteString.plc.golden new file mode 100644 index 00000000000..d98998e13d6 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/RotateByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con integer) (con bytestring))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/ShiftByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ShiftByteString.plc.golden new file mode 100644 index 00000000000..d98998e13d6 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/ShiftByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con integer) (con bytestring))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/TestBitByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/TestBitByteString.plc.golden new file mode 100644 index 00000000000..a647f8f46a6 --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/TestBitByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con integer) (con bool))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/WriteBitByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/WriteBitByteString.plc.golden new file mode 100644 index 00000000000..a4a93f8f64e --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/WriteBitByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con integer) (fun (con bool) (con bytestring)))) \ No newline at end of file diff --git a/plutus-core/plutus-core/test/TypeSynthesis/Golden/XorByteString.plc.golden b/plutus-core/plutus-core/test/TypeSynthesis/Golden/XorByteString.plc.golden new file mode 100644 index 00000000000..01714abd65b --- /dev/null +++ b/plutus-core/plutus-core/test/TypeSynthesis/Golden/XorByteString.plc.golden @@ -0,0 +1 @@ +(fun (con bytestring) (fun (con bytestring) (con bytestring))) \ No newline at end of file diff --git a/plutus-core/plutus-ir/src/PlutusIR/Transform/CommuteFnWithConst.hs b/plutus-core/plutus-ir/src/PlutusIR/Transform/CommuteFnWithConst.hs index 8b2e2cfc24c..1d20f473204 100644 --- a/plutus-core/plutus-ir/src/PlutusIR/Transform/CommuteFnWithConst.hs +++ b/plutus-core/plutus-ir/src/PlutusIR/Transform/CommuteFnWithConst.hs @@ -147,3 +147,16 @@ isCommutative = \case MkPairData -> False MkNilData -> False MkNilPairData -> False + -- Bitwise operations + IntegerToByteString -> False + ByteStringToInteger -> False + AndByteString -> True + IorByteString -> True + XorByteString -> True + ComplementByteString -> False + ShiftByteString -> False + RotateByteString -> False + PopCountByteString -> False + TestBitByteString -> False + WriteBitByteString -> False + FindFirstSetByteString -> False diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs new file mode 100644 index 00000000000..8f1b9cce42d --- /dev/null +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Bitwise.hs @@ -0,0 +1,1118 @@ +-- editorconfig-checker-disable-file +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TypeApplications #-} + +module Evaluation.Builtins.Bitwise ( + bitwiseAndCommutes, + bitwiseIorCommutes, + bitwiseXorCommutes, + bitwiseAndIdentity, + bitwiseIorIdentity, + bitwiseXorIdentity, + bitwiseAndAbsorbing, + bitwiseIorAbsorbing, + bitwiseXorComplement, + bitwiseAndSelf, + bitwiseIorSelf, + bitwiseXorSelf, + bitwiseAndAssociates, + bitwiseIorAssociates, + bitwiseXorAssociates, + bitwiseComplementSelfInverts, + bitwiseAndDeMorgan, + bitwiseIorDeMorgan, + popCountSingleByte, + popCountAppend, + testBitEmpty, + testBitSingleByte, + testBitAppend, + writeBitRead, + writeBitDouble, + writeBitAgreement, + ffsSingleByte, + ffsAppend, + rotateIdentity, + rotateIndexMotion, + rotateHomogenous, + rotateSum, + shiftIdentity, + shiftIndexMotion, + shiftHomogenous, + shiftSum, + iToBsRoundtrip, + bsToITrailing, + ) where + + +import Control.Lens.Fold (Fold, folding, has, hasn't, preview) +import Control.Monad (guard) +import Data.Bitraversable (bitraverse) +import Data.Bits (bit, complement, countTrailingZeros, popCount, shiftL, xor, zeroBits, (.&.), + (.|.)) +import Data.ByteString (ByteString) +import Data.ByteString qualified as BS +import Data.Text (Text) +import Data.Word (Word8) +import Evaluation.Builtins.Common (typecheckEvaluateCek) +import GHC.Exts (fromListN, toList) +import Hedgehog (Gen, PropertyT, Range, annotate, annotateShow, cover, evalEither, failure, + forAllWith, success, (===)) +import Hedgehog.Gen qualified as Gen +import Hedgehog.Range qualified as Range +import PlutusCore (DefaultFun (AddInteger, AndByteString, AppendByteString, ByteStringToInteger, ComplementByteString, FindFirstSetByteString, IntegerToByteString, IorByteString, PopCountByteString, RotateByteString, ShiftByteString, TestBitByteString, WriteBitByteString, XorByteString), + DefaultUni, Error, EvaluationResult (EvaluationFailure, EvaluationSuccess), Name, + Term) +import PlutusCore.Evaluation.Machine.ExBudgetingDefaults (defaultBuiltinCostModel) +import PlutusCore.MkPlc (builtin, mkConstant, mkIterAppNoAnn) +import PlutusPrelude (def) +import Text.Show.Pretty (ppShow) +import UntypedPlutusCore qualified as Untyped + +bitwiseIorCommutes :: PropertyT IO () +bitwiseIorCommutes = commutative (.|.) IorByteString + +bitwiseAndCommutes :: PropertyT IO () +bitwiseAndCommutes = commutative (.&.) AndByteString + +bitwiseXorCommutes :: PropertyT IO () +bitwiseXorCommutes = commutative xor XorByteString + +bitwiseAndIdentity :: PropertyT IO () +bitwiseAndIdentity = identity (complement zeroBits) AndByteString + +bitwiseIorIdentity :: PropertyT IO () +bitwiseIorIdentity = identity zeroBits IorByteString + +bitwiseXorIdentity :: PropertyT IO () +bitwiseXorIdentity = identity zeroBits XorByteString + +bitwiseAndAbsorbing :: PropertyT IO () +bitwiseAndAbsorbing = absorbing zeroBits AndByteString + +bitwiseIorAbsorbing :: PropertyT IO () +bitwiseIorAbsorbing = absorbing (complement zeroBits) IorByteString + +bitwiseXorComplement :: PropertyT IO () +bitwiseXorComplement = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let len = BS.length bs + let allOnes = BS.replicate len . complement $ zeroBits + outcome1 <- goXor bs allOnes + outcome2 <- goComplement bs + case (outcome1, outcome2) of + (EvaluationSuccess res1, EvaluationSuccess res2) -> res1 === res2 + _ -> failure + where + goXor :: + ByteString -> + ByteString -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) + goXor leftArg rightArg = do + let leftArg' = mkConstant @ByteString () leftArg + let rightArg' = mkConstant @ByteString () rightArg + let comp = mkIterAppNoAnn (builtin () XorByteString) [leftArg', rightArg'] + cekEval comp + goComplement :: + ByteString -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) + goComplement bs = do + let bs' = mkConstant @ByteString () bs + let comp = mkIterAppNoAnn (builtin () ComplementByteString) [bs'] + cekEval comp + +bitwiseAndSelf :: PropertyT IO () +bitwiseAndSelf = self AndByteString + +bitwiseIorSelf :: PropertyT IO () +bitwiseIorSelf = self IorByteString + +bitwiseXorSelf :: PropertyT IO () +bitwiseXorSelf = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let len = BS.length bs + let bs' = mkConstant @ByteString () bs + let expected = mkConstant @ByteString () . BS.replicate len $ zeroBits + let comp = mkIterAppNoAnn (builtin () XorByteString) [bs', bs'] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === expected + _ -> failure + +bitwiseAndAssociates :: PropertyT IO () +bitwiseAndAssociates = associative (.&.) AndByteString + +bitwiseIorAssociates :: PropertyT IO () +bitwiseIorAssociates = associative (.|.) IorByteString + +bitwiseXorAssociates :: PropertyT IO () +bitwiseXorAssociates = associative xor XorByteString + +bitwiseComplementSelfInverts :: PropertyT IO () +bitwiseComplementSelfInverts = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let bs' = mkConstant @ByteString () bs + let comp = mkIterAppNoAnn (builtin () ComplementByteString) [ + mkIterAppNoAnn (builtin () ComplementByteString) [bs'] + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant () bs + _ -> failure + +bitwiseAndDeMorgan :: PropertyT IO () +bitwiseAndDeMorgan = demorgan AndByteString IorByteString + +bitwiseIorDeMorgan :: PropertyT IO () +bitwiseIorDeMorgan = demorgan IorByteString AndByteString + +popCountSingleByte :: PropertyT IO () +popCountSingleByte = do + w8 <- forAllWith ppShow Gen.enumBounded + let bs = BS.singleton w8 + let expected :: Integer = fromIntegral . popCount $ w8 + let comp = mkIterAppNoAnn (builtin () PopCountByteString) [ + mkConstant @ByteString () bs + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant () expected + _ -> failure + +popCountAppend :: PropertyT IO () +popCountAppend = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + bs' <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let arg1 = mkConstant @ByteString () bs + let arg2 = mkConstant @ByteString () bs' + let comp1 = mkIterAppNoAnn (builtin () PopCountByteString) [ + mkIterAppNoAnn (builtin () AppendByteString) [arg1, arg2] + ] + let comp2 = mkIterAppNoAnn (builtin () AddInteger) [ + mkIterAppNoAnn (builtin () PopCountByteString) [arg1], + mkIterAppNoAnn (builtin () PopCountByteString) [arg2] + ] + outcome <- bitraverse cekEval cekEval (comp1, comp2) + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +testBitEmpty :: PropertyT IO () +testBitEmpty = do + ix <- forAllWith ppShow . Gen.integral $ indexRange + let arg = mkConstant @ByteString () "" + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + arg, + mkConstant @Integer () ix + ] + outcome <- cekEval comp + case outcome of + EvaluationFailure -> success + _ -> failure + +testBitSingleByte :: PropertyT IO () +testBitSingleByte = do + w8 <- forAllWith ppShow Gen.enumBounded + let bs = BS.singleton w8 + ix <- forAllWith ppShow . Gen.integral . indexRangeOf $ 8 + cover 45 "out of bounds" $ ix < 0 || ix >= 8 + cover 45 "in-bounds" $ 0 <= ix && ix < 8 + let expected = bitAt w8 ix + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () ix + ] + outcome <- cekEval comp + case (expected, outcome) of + (Nothing, EvaluationFailure) -> success + (Just b, EvaluationSuccess res) -> res === mkConstant @Bool () b + _ -> failure + +testBitAppend :: PropertyT IO () +testBitAppend = do + testCase <- forAllWith ppShow genBitAppendCase + cover 30 "out of bounds" . appendOutOfBounds $ testCase + cover 30 "in-bounds, first argument" . appendInBoundsFirst $ testCase + cover 30 "in-bounds, second argument" . appendInBoundsSecond $ testCase + let (x, y, ix) = getBitAppendArgs testCase + let arg1 = mkConstant @ByteString () x + let arg2 = mkConstant @ByteString () y + let argIx = mkConstant @Integer () ix + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkIterAppNoAnn (builtin () AppendByteString) [arg1, arg2], + argIx + ] + let comp' = go x y ix + outcome <- bitraverse cekEval cekEval (comp, comp') + case outcome of + (EvaluationFailure, EvaluationFailure) -> success + (EvaluationSuccess res, EvaluationSuccess res') -> do + annotateShow res + annotateShow res' + res === res' + _ -> failure + where + go :: + ByteString -> + ByteString -> + Integer -> + Term Untyped.TyName Name DefaultUni DefaultFun () + go bs bs' ix = let len' = fromIntegral $ 8 * BS.length bs' in + case compare ix len' of + LT -> mkIterAppNoAnn (builtin () TestBitByteString) [ + mkConstant @ByteString () bs', + mkConstant @Integer () ix + ] + _ -> mkIterAppNoAnn (builtin () TestBitByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () (ix - len') + ] + +writeBitRead :: PropertyT IO () +writeBitRead = do + testCase <- forAllWith ppShow genWriteBitCase + cover 45 "out of bounds" . hasn't _WriteBitResult $ testCase + cover 45 "in-bounds" . has _WriteBitResult $ testCase + let (bs, ix, b) = getWriteBitArgs testCase + let expected = preview _WriteBitResult testCase + let bs' = mkConstant @ByteString () bs + let ix' = mkConstant @Integer () ix + let b' = mkConstant @Bool () b + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkIterAppNoAnn (builtin () WriteBitByteString) [bs', ix', b'], + ix' + ] + outcome <- cekEval comp + case (expected, outcome) of + (Nothing, EvaluationFailure) -> success + (Just res, EvaluationSuccess res') -> mkConstant @Bool () res === res' + _ -> failure + +writeBitDouble :: PropertyT IO () +writeBitDouble = do + testCase <- forAllWith ppShow genWriteBitCase + cover 45 "out of bounds" . hasn't _WriteBitResult $ testCase + cover 45 "in-bounds" . has _WriteBitResult $ testCase + let (bs, ix, b) = getWriteBitArgs testCase + b' <- forAllWith ppShow Gen.enumBounded + let bs' = mkConstant @ByteString () bs + let ix' = mkConstant @Integer () ix + let writeTwice = mkIterAppNoAnn (builtin () WriteBitByteString) [ + mkIterAppNoAnn (builtin () WriteBitByteString) [bs', ix', mkConstant @Bool () b], + ix', + mkConstant @Bool () b' + ] + let writeOnce = mkIterAppNoAnn (builtin () WriteBitByteString) [ + bs', + ix', + mkConstant @Bool () b' + ] + outcome <- bitraverse cekEval cekEval (writeTwice, writeOnce) + case outcome of + (EvaluationFailure, EvaluationFailure) -> success + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +writeBitAgreement :: PropertyT IO () +writeBitAgreement = do + testCase <- forAllWith ppShow genWriteBitAgreementCase + let (bs, writeIx, readIx) = getWriteBitAgreementArgs testCase + cover 45 "read known zero" $ writeIx /= readIx + cover 45 "read known one" $ writeIx == readIx + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkIterAppNoAnn (builtin () WriteBitByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () writeIx, + mkConstant @Bool () True + ], + mkConstant @Integer () readIx + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> + if writeIx == readIx + then res === mkConstant @Bool () True + else res === mkConstant @Bool () False + _ -> failure + +ffsSingleByte :: PropertyT IO () +ffsSingleByte = do + w8 <- forAllWith ppShow Gen.enumBounded + let bs = BS.singleton w8 + let expected = case w8 of + 0 -> (-1) + _ -> fromIntegral . countTrailingZeros $ w8 + let comp = mkIterAppNoAnn (builtin () FindFirstSetByteString) [ + mkConstant @ByteString () bs + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant @Integer () expected + _ -> failure + +ffsAppend :: PropertyT IO () +ffsAppend = do + testCase <- forAllWith ppShow genFFSAppendCase + let which = ffsAppendType testCase + cover 30 "both arguments zero" $ which == ZeroBoth + cover 30 "second argument zero" $ which == ZeroSecond + cover 30 "second argument nonzero" $ which == NotZeroSecond + let (bs, bs') = getFFSAppendArgs testCase + let comp = mkIterAppNoAnn (builtin () FindFirstSetByteString) [ + mkIterAppNoAnn (builtin () AppendByteString) [ + mkConstant @ByteString () bs, + mkConstant @ByteString () bs' + ] + ] + let comp' = case which of + ZeroBoth -> mkConstant @Integer () (-1) + ZeroSecond -> let bitLen' = fromIntegral $ 8 * BS.length bs' in + mkIterAppNoAnn (builtin () AddInteger) [ + mkIterAppNoAnn (builtin () FindFirstSetByteString) [ + mkConstant @ByteString () bs + ], + mkConstant @Integer () bitLen' + ] + NotZeroSecond -> mkIterAppNoAnn (builtin () FindFirstSetByteString) [ + mkConstant @ByteString () bs' + ] + outcome <- bitraverse cekEval cekEval (comp, comp') + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +rotateIdentity :: PropertyT IO () +rotateIdentity = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let comp = mkIterAppNoAnn (builtin () RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () 0 + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant () bs + _ -> failure + +shiftIdentity :: PropertyT IO () +shiftIdentity = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let comp = mkIterAppNoAnn (builtin () ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () 0 + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant () bs + _ -> failure + + +rotateIndexMotion :: PropertyT IO () +rotateIndexMotion = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + w8 <- forAllWith ppShow Gen.enumBounded + let bs' = BS.cons w8 bs + let bitLen = fromIntegral $ BS.length bs' * 8 + i <- forAllWith ppShow . Gen.integral . indexRangeOf $ bitLen + readIx <- forAllWith ppShow . Gen.integral . indexRangeFor $ bitLen + let expectedReadIx = case signum i of + 1 -> let raw = readIx - i in + case signum raw of + (-1) -> bitLen + raw + _ -> raw + 0 -> readIx + _ -> (readIx - i) `rem` bitLen + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkIterAppNoAnn (builtin () RotateByteString) [ + mkConstant @ByteString () bs', + mkConstant @Integer () i + ], + mkConstant @Integer () readIx + ] + let expected = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkConstant @ByteString () bs', + mkConstant @Integer () expectedReadIx + ] + outcome <- bitraverse cekEval cekEval (expected, comp) + case outcome of + (EvaluationSuccess res, EvaluationSuccess actual) -> res === actual + _ -> failure + +shiftIndexMotion :: PropertyT IO () +shiftIndexMotion = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + w8 <- forAllWith ppShow Gen.enumBounded + let bs' = BS.cons w8 bs + let bitLen = fromIntegral $ BS.length bs' * 8 + i <- forAllWith ppShow . Gen.integral . indexRangeOf $ bitLen + readIx <- forAllWith ppShow . Gen.integral . indexRangeFor $ bitLen + let comp = mkIterAppNoAnn (builtin () TestBitByteString) [ + mkIterAppNoAnn (builtin () ShiftByteString) [ + mkConstant @ByteString () bs', + mkConstant @Integer () i + ], + mkConstant @Integer () readIx + ] + let comp' = let expectedIx = readIx - i in + if | expectedIx < 0 -> mkConstant @Bool () False + | expectedIx >= bitLen -> mkConstant @Bool () False + | otherwise -> mkIterAppNoAnn (builtin () TestBitByteString) [ + mkConstant @ByteString () bs', + mkConstant @Integer () expectedIx + ] + outcome <- bitraverse cekEval cekEval (comp, comp') + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +rotateHomogenous :: PropertyT IO () +rotateHomogenous = do + w8 <- forAllWith ppShow . Gen.element $ [zeroBits, complement zeroBits] + cover 45 "all ones" $ w8 == complement zeroBits + cover 45 "all zeroes" $ w8 == zeroBits + len <- forAllWith ppShow . Gen.integral $ byteBoundRange + let bs = BS.replicate len w8 + rotation <- forAllWith ppShow . Gen.integral $ indexRange + let comp = mkIterAppNoAnn (builtin () RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () rotation + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant @ByteString () bs + _ -> failure + +shiftHomogenous :: PropertyT IO () +shiftHomogenous = do + len <- forAllWith ppShow . Gen.integral $ byteBoundRange + i <- forAllWith ppShow . Gen.integral $ indexRange + let bs = BS.replicate len zeroBits + let comp = mkIterAppNoAnn (builtin () ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant @ByteString () bs + _ -> failure + +rotateSum :: PropertyT IO () +rotateSum = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + i <- forAllWith ppShow . Gen.integral $ indexRange + j <- forAllWith ppShow . Gen.integral $ indexRange + let comp1 = mkIterAppNoAnn (builtin () RotateByteString) [ + mkIterAppNoAnn (builtin () RotateByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ], + mkConstant @Integer () j + ] + let comp2 = mkIterAppNoAnn (builtin () RotateByteString) [ + mkConstant @ByteString () bs, + mkIterAppNoAnn (builtin () AddInteger) [ + mkConstant @Integer () i, + mkConstant @Integer () j + ] + ] + outcome <- bitraverse cekEval cekEval (comp1, comp2) + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +shiftSum :: PropertyT IO () +shiftSum = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + ij <- forAllWith ppShow . Gen.integral $ indexRange + (i, j) <- forAllWith ppShow . genSplit $ ij + let comp1 = mkIterAppNoAnn (builtin () ShiftByteString) [ + mkIterAppNoAnn (builtin () ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () i + ], + mkConstant @Integer () j + ] + let comp2 = mkIterAppNoAnn (builtin () ShiftByteString) [ + mkConstant @ByteString () bs, + mkConstant @Integer () ij + ] + outcome <- bitraverse cekEval cekEval (comp1, comp2) + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +iToBsRoundtrip :: PropertyT IO () +iToBsRoundtrip = do + i <- forAllWith ppShow . Gen.integral $ integerRange + let comp = mkIterAppNoAnn (builtin () ByteStringToInteger) [ + mkIterAppNoAnn (builtin () IntegerToByteString) [ + mkConstant @Integer () i + ] + ] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant @Integer () i + _ -> failure + +bsToITrailing :: PropertyT IO () +bsToITrailing = do + BsToITrailingCase extension bs <- forAllWith ppShow genBsToITrailingCase + let comp = mkIterAppNoAnn (builtin () ByteStringToInteger) [ + mkIterAppNoAnn (builtin () AppendByteString) [ + mkConstant @ByteString () bs, + mkConstant @ByteString () extension + ] + ] + let comp' = mkIterAppNoAnn (builtin () ByteStringToInteger) [ + mkConstant @ByteString () bs + ] + outcome <- bitraverse cekEval cekEval (comp, comp') + case outcome of + (EvaluationSuccess res, EvaluationSuccess res') -> res === res' + _ -> failure + +-- Helpers + +data BsToITrailingCase = BsToITrailingCase ByteString ByteString + deriving stock (Eq, Show) + +data WriteBitAgreementCase = + WriteBitReadSame Int Integer | + WriteBitReadDifferent Int Integer Integer + deriving stock (Eq, Show) + +getWriteBitAgreementArgs :: WriteBitAgreementCase -> (ByteString, Integer, Integer) +getWriteBitAgreementArgs = \case + WriteBitReadSame len ix -> (BS.replicate len zeroBits, ix, ix) + WriteBitReadDifferent len ix ix' -> (BS.replicate len zeroBits, ix, ix') + +data FFSAppendType = ZeroBoth | ZeroSecond | NotZeroSecond + deriving stock (Eq) + +data FFSAppendCase = + FFSAppendBothZero Int Int | + FFSAppendSecondZero ByteString Int | + FFSAppendSecondNonZero ByteString ByteString + deriving stock (Eq, Show) + +getFFSAppendArgs :: FFSAppendCase -> (ByteString, ByteString) +getFFSAppendArgs = \case + FFSAppendBothZero len len' -> (BS.replicate len zeroBits, BS.replicate len' zeroBits) + FFSAppendSecondZero bs len -> (bs, BS.replicate len zeroBits) + FFSAppendSecondNonZero bs bs' -> (bs, bs') + +ffsAppendType :: FFSAppendCase -> FFSAppendType +ffsAppendType = \case + FFSAppendBothZero{} -> ZeroBoth + FFSAppendSecondZero{} -> ZeroSecond + FFSAppendSecondNonZero{} -> NotZeroSecond + +data WriteBitCase = + WriteBitOutOfBounds ByteString Integer Bool | + WriteBitInBounds ByteString Integer Bool + deriving stock (Eq, Show) + +_WriteBitResult :: Fold WriteBitCase Bool +_WriteBitResult = folding $ \case + WriteBitInBounds _ _ b -> pure b + _ -> Nothing + +getWriteBitArgs :: WriteBitCase -> (ByteString, Integer, Bool) +getWriteBitArgs = \case + WriteBitOutOfBounds bs ix b -> (bs, ix, b) + WriteBitInBounds bs ix b -> (bs, ix, b) + +data BitAppendCase = + AppendOutOfBounds ByteString ByteString Integer | + AppendInBoundsFirst ByteString ByteString Integer | + AppendInBoundsSecond ByteString ByteString Integer + deriving stock (Eq, Show) + +appendOutOfBounds :: BitAppendCase -> Bool +appendOutOfBounds = \case + AppendOutOfBounds{} -> True + _ -> False + +appendInBoundsFirst :: BitAppendCase -> Bool +appendInBoundsFirst = \case + AppendInBoundsFirst{} -> True + _ -> False + +appendInBoundsSecond :: BitAppendCase -> Bool +appendInBoundsSecond = \case + AppendInBoundsSecond{} -> True + _ -> False + +getBitAppendArgs :: BitAppendCase -> (ByteString, ByteString, Integer) +getBitAppendArgs = \case + AppendOutOfBounds bs bs' ix -> (bs, bs', ix) + AppendInBoundsFirst bs bs' ix -> (bs, bs', ix) + AppendInBoundsSecond bs bs' ix -> (bs, bs', ix) + +bitAt :: Word8 -> Integer -> Maybe Bool +bitAt w8 ix = do + guard (ix >= 0) + guard (ix < 8) + let mask = bit 0 `shiftL` fromIntegral ix + pure $ case mask .&. w8 of + 0 -> False + _ -> True + +demorgan :: + DefaultFun -> + DefaultFun -> + PropertyT IO () +demorgan b b' = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let len = BS.length bs + bs' <- forAllWith ppShow . Gen.bytes . Range.singleton $ len + outcome <- demorganing b b' bs bs' + case outcome of + (EvaluationSuccess res1, EvaluationSuccess res2) -> res1 === res2 + _ -> failure + +demorganing :: + DefaultFun -> + DefaultFun -> + ByteString -> + ByteString -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ()), + EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) +demorganing fun fun' x y = do + let x' = mkConstant @ByteString () x + let y' = mkConstant @ByteString () y + let comp = mkIterAppNoAnn (builtin () ComplementByteString) [ + mkIterAppNoAnn (builtin () fun) [x', y'] + ] + let comp' = mkIterAppNoAnn (builtin () fun') [ + mkIterAppNoAnn (builtin () ComplementByteString) [x'], + mkIterAppNoAnn (builtin () ComplementByteString) [y'] + ] + bitraverse cekEval cekEval (comp, comp') + +data AssociativeCase = + AssociativeMismatched ByteString ByteString ByteString | + AssociativeMatched ByteString ByteString ByteString ByteString + deriving stock (Eq, Show) + +getAssociativeArgs :: AssociativeCase -> (ByteString, ByteString, ByteString) +getAssociativeArgs = \case + AssociativeMismatched x y z -> (x, y, z) + AssociativeMatched x y z _ -> (x, y, z) + +_AssociativeResult :: Fold AssociativeCase ByteString +_AssociativeResult = folding $ \case + AssociativeMatched _ _ _ res -> pure res + _ -> Nothing + +associative :: + (Word8 -> Word8 -> Word8) -> + DefaultFun -> + PropertyT IO () +associative f b = do + testCase <- forAllWith ppShow . genAssociativeCase $ f + cover 45 "mismatched lengths" . hasn't _AssociativeResult $ testCase + cover 45 "matched lengths" . has _AssociativeResult $ testCase + let expectedMay = preview _AssociativeResult testCase + let (x, y, z) = getAssociativeArgs testCase + outcome <- associatively b x y z + case (outcome, expectedMay) of + ((EvaluationFailure, EvaluationFailure), Nothing) -> success + (_, Nothing) -> annotate "Unexpected failure" >> failure + ((EvaluationSuccess leftAssoc, EvaluationSuccess rightAssoc), Just expected) -> do + leftAssoc === rightAssoc + leftAssoc === mkConstant () expected + _ -> annotate "Unexpected failure" >> failure + +associatively :: + DefaultFun -> + ByteString -> + ByteString -> + ByteString -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ()), + EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) +associatively fun x y z = do + let x' = mkConstant @ByteString () x + let y' = mkConstant @ByteString () y + let z' = mkConstant @ByteString () z + let leftAssoc = mkIterAppNoAnn (builtin () fun) [ + mkIterAppNoAnn (builtin () fun) [x', y'], + z' + ] + let rightAssoc = mkIterAppNoAnn (builtin () fun) [ + x', + mkIterAppNoAnn (builtin () fun) [y', z'] + ] + bitraverse cekEval cekEval (leftAssoc, rightAssoc) + +self :: DefaultFun -> PropertyT IO () +self b = do + bs <- forAllWith ppShow . Gen.bytes $ byteBoundRange + let bs' = mkConstant @ByteString () bs + let comp = mkIterAppNoAnn (builtin () b) [bs', bs'] + outcome <- cekEval comp + case outcome of + EvaluationSuccess res -> res === mkConstant @ByteString () bs + _ -> failure + +data AbsorbingCase = + AbsorbingMismatched ByteString Int Word8 | + AbsorbingMatched ByteString Word8 + deriving stock (Eq, Show) + +_AbsorbingResult :: Fold AbsorbingCase ByteString +_AbsorbingResult = folding $ \case + AbsorbingMatched bs w8 -> pure . BS.replicate (BS.length bs) $ w8 + _ -> Nothing + +getAbsorbingArgs :: AbsorbingCase -> (ByteString, ByteString) +getAbsorbingArgs = \case + AbsorbingMismatched bs len w8 -> (bs, BS.replicate len w8) + AbsorbingMatched bs w8 -> (bs, BS.replicate (BS.length bs) w8) + +absorbing :: + Word8 -> + DefaultFun -> + PropertyT IO () +absorbing w8 b = do + testCase <- forAllWith ppShow . genAbsorbingCase $ w8 + cover 45 "mismatched lengths" . hasn't _AbsorbingResult $ testCase + cover 45 "matched lengths" . has _AbsorbingResult $ testCase + let expectedMay = preview _AbsorbingResult testCase + let (leftArg, rightArg) = getAbsorbingArgs testCase + outcome <- commutatively b leftArg rightArg + case (outcome, expectedMay) of + ((EvaluationFailure, EvaluationFailure), Nothing) -> success + (_, Nothing) -> do + annotate "Unexpected success" + failure + ((EvaluationSuccess l2r, EvaluationSuccess r2l), Just expected) -> do + l2r === r2l + l2r === mkConstant () expected + _ -> do + annotate "Unexpected failure" + failure + +data IdentityCase = + IdentityMismatched ByteString Int Word8 | + IdentityMatched ByteString Word8 + deriving stock (Eq, Show) + +_IdentityResult :: Fold IdentityCase ByteString +_IdentityResult = folding $ \case + IdentityMatched res _ -> pure res + _ -> Nothing + +getIdentityArgs :: IdentityCase -> (ByteString, ByteString) +getIdentityArgs = \case + IdentityMismatched bs len w8 -> (bs, BS.replicate len w8) + IdentityMatched bs w8 -> (bs, BS.replicate (BS.length bs) w8) + +identity :: + Word8 -> + DefaultFun -> + PropertyT IO () +identity w8 b = do + testCase <- forAllWith ppShow . genIdentityCase $ w8 + cover 45 "mismatched lengths" . hasn't _IdentityResult $ testCase + cover 45 "matched lengths" . has _IdentityResult $ testCase + let expectedMay = preview _IdentityResult testCase + let (leftArg, rightArg) = getIdentityArgs testCase + outcome <- commutatively b leftArg rightArg + case (outcome, expectedMay) of + ((EvaluationFailure, EvaluationFailure), Nothing) -> success + (_, Nothing) -> do + annotate "Unexpected success" + failure + ((EvaluationSuccess l2r, EvaluationSuccess r2l), Just expected) -> do + l2r === r2l + l2r === mkConstant () expected + _ -> do + annotate "Unexpected failure" + failure + +data CommutativeCase = + MismatchedLengths ByteString ByteString | + MatchedLengths ByteString ByteString ByteString + deriving stock (Eq, Show) + +getArgs :: CommutativeCase -> (ByteString, ByteString) +getArgs = \case + MismatchedLengths bs bs' -> (bs, bs') + MatchedLengths bs bs' _ -> (bs, bs') + +_CommutativeResult :: Fold CommutativeCase ByteString +_CommutativeResult = folding $ \case + MatchedLengths _ _ res -> pure res + _ -> Nothing + +commutative :: + (Word8 -> Word8 -> Word8) -> + DefaultFun -> + PropertyT IO () +commutative f b = do + testCase <- forAllWith ppShow . genCommutativeCase $ f + cover 45 "mismatched lengths" . hasn't _CommutativeResult $ testCase + cover 45 "matched lengths" . has _CommutativeResult $ testCase + let expectedMay = preview _CommutativeResult testCase + let (leftArg, rightArg) = getArgs testCase + outcome <- commutatively b leftArg rightArg + case (outcome, expectedMay) of + ((EvaluationFailure, EvaluationFailure), Nothing) -> success + (_, Nothing) -> do + annotate "Unexpected success" + failure + ((EvaluationSuccess l2r, EvaluationSuccess r2l), Just expected) -> do + l2r === r2l + l2r === mkConstant () expected + _ -> do + annotate "Unexpected failure" + failure + +commutatively :: + DefaultFun -> + ByteString -> + ByteString -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ()), + EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) +commutatively fun leftArg rightArg = do + let leftArg' = mkConstant @ByteString () leftArg + let rightArg' = mkConstant @ByteString () rightArg + let oneDirection = go leftArg' rightArg' + let otherDirection = go rightArg' leftArg' + bitraverse cekEval cekEval (oneDirection, otherDirection) + where + go :: Term Untyped.TyName Name DefaultUni DefaultFun () -> + Term Untyped.TyName Name DefaultUni DefaultFun () -> + Term Untyped.TyName Name DefaultUni DefaultFun () + go arg1 arg2 = mkIterAppNoAnn (builtin () fun) [arg1, arg2] + +cekEval :: + Term Untyped.TyName Name DefaultUni DefaultFun () -> + PropertyT IO (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ())) +cekEval = fmap fst . evalEither . cekEval' + +cekEval' :: + Term Untyped.TyName Name DefaultUni DefaultFun () -> + Either (Error DefaultUni DefaultFun ()) + (EvaluationResult (Untyped.Term Name DefaultUni DefaultFun ()), [Text]) +cekEval' = typecheckEvaluateCek def defaultBuiltinCostModel + +-- Generators + +genBsToITrailingCase :: Gen BsToITrailingCase +genBsToITrailingCase = go + where + go :: Gen BsToITrailingCase + go = do + len <- Gen.integral byteBoundRange + extLen <- Gen.integral byteBoundRange + BsToITrailingCase (BS.replicate extLen zeroBits) <$> + case len of + 0 -> pure BS.empty + _ -> Gen.choice [pure . powerOf2 $ len, notPowerOf2 len] + powerOf2 :: Int -> ByteString + powerOf2 len = BS.cons 128 . BS.replicate (len - 1) $ zeroBits + notPowerOf2 :: Int -> Gen ByteString + notPowerOf2 len = + BS.cons <$> Gen.element [0 :: Word8 .. 127] <*> + (Gen.bytes . Range.singleton $ len - 1) + +genWriteBitAgreementCase :: Gen WriteBitAgreementCase +genWriteBitAgreementCase = do + len <- Gen.integral . Range.linear 1 $ 64 + Gen.choice [same len, different len] + where + same :: Int -> Gen WriteBitAgreementCase + same len = do + let bitLen = fromIntegral $ len * 8 + ix <- Gen.integral . indexRangeFor $ bitLen + pure . WriteBitReadSame len $ ix + different :: Int -> Gen WriteBitAgreementCase + different len = do + let bitLen = fromIntegral $ len * 8 + readIx <- Gen.integral . indexRangeFor $ bitLen + writeIx <- Gen.filter (readIx /=) . Gen.integral . indexRangeFor $ bitLen + pure . WriteBitReadDifferent len writeIx $ readIx + +genCommutativeCase :: (Word8 -> Word8 -> Word8) -> Gen CommutativeCase +genCommutativeCase f = Gen.choice [mismatched, matched] + where + mismatched :: Gen CommutativeCase + mismatched = do + leftArg <- Gen.bytes byteBoundRange + rightArg <- Gen.bytes byteBoundRange + if BS.length leftArg /= BS.length rightArg + then pure . MismatchedLengths leftArg $ rightArg + else do + let diff = BS.length leftArg - BS.length rightArg + extension <- Gen.bytes . diffRange $ diff + let leftArg' = leftArg <> extension + Gen.element [MismatchedLengths leftArg' rightArg, + MismatchedLengths rightArg leftArg'] + matched :: Gen CommutativeCase + matched = do + leftArg <- Gen.bytes byteBoundRange + let len = BS.length leftArg + rightArg <- Gen.bytes . Range.singleton $ len + let result = fromListN len . BS.zipWith f leftArg $ rightArg + pure . MatchedLengths leftArg rightArg $ result + +genIdentityCase :: Word8 -> Gen IdentityCase +genIdentityCase w8 = Gen.choice [mismatched, matched] + where + mismatched :: Gen IdentityCase + mismatched = do + bs <- Gen.bytes byteBoundRange + let len = BS.length bs + genLen <- Gen.filter (/= len) . Gen.int $ byteBoundRange + pure . IdentityMismatched bs genLen $ w8 + matched :: Gen IdentityCase + matched = do + bs <- Gen.bytes byteBoundRange + pure . IdentityMatched bs $ w8 + +genAbsorbingCase :: Word8 -> Gen AbsorbingCase +genAbsorbingCase w8 = Gen.choice [mismatched, matched] + where + mismatched :: Gen AbsorbingCase + mismatched = do + bs <- Gen.bytes byteBoundRange + let len = BS.length bs + genLen <- Gen.filter (/= len) . Gen.int $ byteBoundRange + pure . AbsorbingMismatched bs genLen $ w8 + matched :: Gen AbsorbingCase + matched = do + bs <- Gen.bytes byteBoundRange + pure . AbsorbingMatched bs $ w8 + +genAssociativeCase :: (Word8 -> Word8 -> Word8) -> Gen AssociativeCase +genAssociativeCase f = Gen.choice [mismatched, matched] + where + mismatched :: Gen AssociativeCase + mismatched = do + x <- Gen.bytes byteBoundRange + y <- Gen.bytes byteBoundRange + z <- Gen.bytes byteBoundRange + if BS.length x == BS.length y && BS.length y == BS.length z + then do + extension <- Gen.bytes . diffRange $ 5 + let x' = x <> extension + Gen.element [AssociativeMismatched x' y z, + AssociativeMismatched y x' z, + AssociativeMismatched y z x'] + else pure . AssociativeMismatched x y $ z + matched :: Gen AssociativeCase + matched = do + x <- Gen.bytes byteBoundRange + let len = BS.length x + y <- Gen.bytes . Range.singleton $ len + z <- Gen.bytes . Range.singleton $ len + let result = fromListN len . zipWith f (toList x) . BS.zipWith f y $ z + pure . AssociativeMatched x y z $ result + +genBitAppendCase :: Gen BitAppendCase +genBitAppendCase = Gen.choice [oob, inBounds1, inBounds2] + where + oob :: Gen BitAppendCase + oob = do + bs <- Gen.bytes byteBoundRange + bs' <- Gen.bytes byteBoundRange + let len = fromIntegral $ 8 * (BS.length bs + BS.length bs') + ix <- Gen.choice [tooLowIx len, tooHighIx len] + pure . AppendOutOfBounds bs bs' $ ix + inBounds1 :: Gen BitAppendCase + inBounds1 = do + bs <- Gen.bytes byteBoundRange + w8 <- Gen.enumBounded + let firstArg = BS.cons w8 bs + bs' <- Gen.bytes byteBoundRange + let len = fromIntegral $ 8 * BS.length firstArg + let len' = fromIntegral $ 8 * BS.length bs' + ix <- (len' +) <$> (Gen.integral . indexRangeFor $ len) + pure . AppendInBoundsFirst firstArg bs' $ ix + inBounds2 :: Gen BitAppendCase + inBounds2 = do + bs <- Gen.bytes byteBoundRange + bs' <- Gen.bytes byteBoundRange + w8 <- Gen.enumBounded + let secondArg = BS.cons w8 bs' + let len' = fromIntegral $ 8 * BS.length secondArg + ix <- Gen.integral . indexRangeFor $ len' + pure . AppendInBoundsSecond bs secondArg $ ix + +genWriteBitCase :: Gen WriteBitCase +genWriteBitCase = Gen.choice [oob, inBounds] + where + oob :: Gen WriteBitCase + oob = do + bs <- Gen.bytes byteBoundRange + let len = fromIntegral $ 8 * BS.length bs + b <- Gen.enumBounded + ix <- Gen.choice [tooLowIx len, tooHighIx len] + pure . WriteBitOutOfBounds bs ix $ b + inBounds :: Gen WriteBitCase + inBounds = do + bs <- Gen.bytes byteBoundRange + w8 <- Gen.enumBounded + let bs' = BS.cons w8 bs + let len = fromIntegral $ 8 * BS.length bs' + b <- Gen.enumBounded + ix <- Gen.integral . indexRangeFor $ len + pure . WriteBitInBounds bs' ix $ b + +genFFSAppendCase :: Gen FFSAppendCase +genFFSAppendCase = Gen.choice [allZero, secondZero, secondNonZero] + where + allZero :: Gen FFSAppendCase + allZero = do + len <- Gen.integral . Range.linear 0 $ 63 + len' <- Gen.integral . Range.linear 0 $ 63 + pure . FFSAppendBothZero len $ len' + secondZero :: Gen FFSAppendCase + secondZero = do + bs <- Gen.bytes byteBoundRange + w8 <- Gen.filter (/= zeroBits) Gen.enumBounded + let firstArg = BS.cons w8 bs + len' <- Gen.integral . Range.linear 0 $ 63 + pure . FFSAppendSecondZero firstArg $ len' + secondNonZero :: Gen FFSAppendCase + secondNonZero = do + bs <- Gen.bytes byteBoundRange + w8 <- Gen.filter (/= zeroBits) Gen.enumBounded + bs' <- Gen.bytes byteBoundRange + w8' <- Gen.filter (/= zeroBits) Gen.enumBounded + pure . FFSAppendSecondNonZero (BS.cons w8 bs) . BS.cons w8' $ bs' + +tooLowIx :: Integer -> Gen Integer +tooLowIx = Gen.integral . Range.linear (-1) . negate + +tooHighIx :: Integer -> Gen Integer +tooHighIx i = Gen.integral . Range.linear i $ i * 2 + +genSplit :: Integer -> Gen (Integer, Integer) +genSplit ij = Gen.element $ case signum ij of + 1 -> [(i, j) | i <- [0 .. ij], j <- [0 .. ij], i + j == ij] + 0 -> [(0, 0)] + _ -> [(i, j) | i <- [0, (-1) .. ij], j <- [0, (-1) .. ij], i + j == ij] + +-- Ranges + +byteBoundRange :: Range Int +byteBoundRange = Range.linear 0 64 + +diffRange :: Int -> Range Int +diffRange diff = let param = abs diff + 1 in + Range.linear param (param * 2) + +indexRange :: Range Integer +indexRange = Range.linearFrom 0 (-100) 100 + +indexRangeOf :: Integer -> Range Integer +indexRangeOf lim = Range.constantFrom 0 (negate lim) (lim - 1) + +indexRangeFor :: Integer -> Range Integer +indexRangeFor i = Range.constant 0 (i - 1) + +integerRange :: Range Integer +integerRange = Range.linear 0 200 diff --git a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs index 91ab2302e31..8dfa4d90151 100644 --- a/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs +++ b/plutus-core/untyped-plutus-core/test/Evaluation/Builtins/Definition.hs @@ -39,12 +39,12 @@ import PlutusCore.StdLib.Data.ScottList qualified as Scott import PlutusCore.StdLib.Data.ScottUnit qualified as Scott import PlutusCore.StdLib.Data.Unit +import Evaluation.Builtins.Bitwise import Evaluation.Builtins.BLS12_381 (test_BLS12_381) import Evaluation.Builtins.Common import Evaluation.Builtins.SignatureVerification (ecdsaSecp256k1Prop, ed25519_V1Prop, ed25519_V2Prop, schnorrSecp256k1Prop) - import Control.Exception import Data.ByteString (ByteString) import Data.DList qualified as DList @@ -747,9 +747,144 @@ test_SignatureVerification = "Schnorr verification behaves correctly on all inputs" "schnorr_correct" . property $ schnorrSecp256k1Prop + ], + testGroup "Ed25519 signatures (V1)" [ + testPropertyNamed "Ed25519_V1 verification behaves correctly on all inputs" "ed25519_V1_correct" . property $ ed25519_V1Prop + ], + testGroup "Ed25519 signatures (V2)" [ + testPropertyNamed "Ed25519_V2 verification behaves correctly on all inputs" "ed25519_V2_correct" . property $ ed25519_V2Prop + ], + testGroup "Signatures on the SECP256k1 curve" [ + testPropertyNamed "ECDSA verification behaves correctly on all inputs" "ecdsa_correct" . property $ ecdsaSecp256k1Prop, + testPropertyNamed "Schnorr verification behaves correctly on all inputs" "schnorr_correct" . property $ schnorrSecp256k1Prop ] ] +-- Test the bitwise builtins are behaving correctly +test_Bitwise :: TestTree +test_Bitwise = + adjustOption (\x -> max x . HedgehogTestLimit . Just $ 8000) . + testGroup "Bitwise operations" $ [ + testAndByteString, + testIorByteString, + testXorByteString, + testComplementByteString, + testPopCountByteString, + testTestBitByteString, + testWriteBitByteString, + testFindFirstSetByteString, + testRotateByteString, + testShiftByteString, + testIntegerToByteString, + testByteStringToInteger + ] + +-- Tests for bitwise AND on ByteStrings +testAndByteString :: TestTree +testAndByteString = testGroup "AndByteString" [ + testPropertyNamed "Commutativity" "and_commutes" . property $ bitwiseAndCommutes, + testPropertyNamed "Associativity" "and_associates" . property $ bitwiseAndAssociates, + testPropertyNamed "All-1s is an identity" "and_all_1_identity" . property $ bitwiseAndIdentity, + testPropertyNamed "All-0s is absorbing" "and_all_0_absorb" . property $ bitwiseAndAbsorbing, + testPropertyNamed "AND with yourself does nothing" "and_self_nothing" . property $ bitwiseAndSelf, + testPropertyNamed "De Morgan's law" "and_demorgan" . property $ bitwiseAndDeMorgan + ] + +-- Tests for bitwise IOR on ByteStrings +testIorByteString :: TestTree +testIorByteString = testGroup "IorByteString" [ + testPropertyNamed "Commutativity" "ior_commutes" . property $ bitwiseIorCommutes, + testPropertyNamed "Associativity" "ior_associates" . property $ bitwiseIorAssociates, + testPropertyNamed "All-0s is an identity" "ior_all_0_identity" . property $ bitwiseIorIdentity, + testPropertyNamed "All-1s is absorbing" "ior_all_1_absorb" . property $ bitwiseIorAbsorbing, + testPropertyNamed "IOR with yourself does nothing" "ior_self_nothing" . property $ bitwiseIorSelf, + testPropertyNamed "De Morgan's law" "ior_demorgan" . property $ bitwiseIorDeMorgan + ] + +-- Tests for bitwise XOR on ByteStrings +testXorByteString :: TestTree +testXorByteString = testGroup "XorByteString" [ + testPropertyNamed "Commutativity" "xor_commutes" . property $ bitwiseXorCommutes, + testPropertyNamed "Associativity" "xor_associates" . property $ bitwiseXorAssociates, + testPropertyNamed "All-0s is an identity" "xor_all_0_identity" . property $ bitwiseXorIdentity, + testPropertyNamed "XOR with all 1s is complement" "xor_all_1_complement" . property $ bitwiseXorComplement, + testPropertyNamed "XOR with yourself gives all-0" "xor_self_all_0" . property $ bitwiseXorSelf + ] + +-- Tests for bitwise complement on ByteStrings +testComplementByteString :: TestTree +testComplementByteString = testGroup "ComplementByteString" [ + testPropertyNamed "Self-inversion" "complement_self_inversion" . property $ bitwiseComplementSelfInverts + ] + +-- Tests for population count on ByteStrings +testPopCountByteString :: TestTree +testPopCountByteString = testGroup "PopCountByteString" [ + testCase "popcount of empty ByteString is 0" $ do + let arg = mkConstant @ByteString () "" + let comp = mkIterAppNoAnn (builtin () PopCountByteString) [ arg ] + typecheckEvaluateCekNoEmit def defaultBuiltinCostModel comp @?= Right (EvaluationSuccess . mkConstant @Integer () $ 0), + testPropertyNamed "popcount of singleton ByteString is correct" "popcount_singleton" . property $ popCountSingleByte, + testPropertyNamed "popcount of append is sum of popcounts" "popcount_append_sum" . property $ popCountAppend + ] + +-- Tests for bit indexing into a ByteString +testTestBitByteString :: TestTree +testTestBitByteString = testGroup "TestBitByteString" [ + testPropertyNamed "any index on an empty ByteString fails" "test_bit_empty" . property $ testBitEmpty, + testPropertyNamed "indexing on singletons works correctly" "test_bit_singleton" . property $ testBitSingleByte, + testPropertyNamed "indexing appends agrees with components" "test_bit_agreement" . property $ testBitAppend + ] + +-- Tests for bit setting or clearing of a ByteString +testWriteBitByteString :: TestTree +testWriteBitByteString = testGroup "WriteBitByteString" [ + testPropertyNamed "writing then reading gives back what you wrote" "write_bit_read" . property $ writeBitRead, + testPropertyNamed "second write wins" "write_bit_write" . property $ writeBitDouble, + testPropertyNamed "single write to zeroes gives right reads" "write_bit_agreement" . property $ writeBitAgreement + ] + +-- Tests for finding first set bit of a ByteString +testFindFirstSetByteString :: TestTree +testFindFirstSetByteString = testGroup "FindFirstSetByteString" [ + testCase "find first set of empty Bytestring is -1" $ do + let arg = mkConstant @ByteString () "" + let comp = mkIterAppNoAnn (builtin () FindFirstSetByteString) [ arg ] + typecheckEvaluateCekNoEmit def defaultBuiltinCostModel comp @?= Right (EvaluationSuccess . mkConstant @Integer () $ (-1)), + testPropertyNamed "find first set on singletons works correctly" "ffs_singleton" . property $ ffsSingleByte, + testPropertyNamed "find first set on appended ByteStrings works correctly" "ffs_append" . property $ ffsAppend + ] + +-- Tests for ByteString rotations +testRotateByteString :: TestTree +testRotateByteString = testGroup "RotateByteString" [ + testPropertyNamed "rotating by 0 does nothing" "rotate_0_nothing" . property $ rotateIdentity, + testPropertyNamed "rotation adjusts indices correctly" "rotate_adjust" . property $ rotateIndexMotion, + testPropertyNamed "rotating all-zero or all-one changes nothing" "rotate_homogenous" . property $ rotateHomogenous, + testPropertyNamed "rotating by i, then by j is the same as rotating by i + j" "rotate_sum" . property $ rotateSum + ] + +-- Tests for ByteString shifts +testShiftByteString :: TestTree +testShiftByteString = testGroup "ShiftByteString" [ + testPropertyNamed "shifting by 0 does nothing" "shift_0_nothing" . property $ shiftIdentity, + testPropertyNamed "shifting adjusts indices correctly" "shift_adjust" . property $ shiftIndexMotion, + testPropertyNamed "shifting all-zeroes does nothing" "shift_homogenous" . property $ shiftHomogenous, + testPropertyNamed "shifting in two steps is the same as shifting in one" "shift_sum" . property $ shiftSum + ] + +-- Tests for conversion into ByteString from Integer +testIntegerToByteString :: TestTree +testIntegerToByteString = testGroup "IntegerToByteString" [ + testPropertyNamed "Round trip" "i_to_bs_roundtrip" . property $ iToBsRoundtrip + ] + +-- Tests for conversion into Integer from ByteString +testByteStringToInteger :: TestTree +testByteStringToInteger = testGroup "ByteStringToInteger" [ + testPropertyNamed "trailing zeros ignored" "bs_to_i_trailing" . property $ bsToITrailing + ] + test_definition :: TestTree test_definition = testGroup "definition" @@ -778,6 +913,7 @@ test_definition = , test_Data , test_Crypto , test_HashSizes + , test_Bitwise , test_SignatureVerification , test_BLS12_381 , test_Other diff --git a/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs b/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs index f2552fe4cfc..9337574390f 100644 --- a/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs +++ b/plutus-ledger-api/src/PlutusLedgerApi/Common/Versions.hs @@ -109,7 +109,12 @@ builtinsIntroducedIn = Map.fromList [ Bls12_381_G2_equal, Bls12_381_G2_hashToGroup, Bls12_381_G2_compress, Bls12_381_G2_uncompress, Bls12_381_millerLoop, Bls12_381_mulMlResult, Bls12_381_finalVerify, - Keccak_256, Blake2b_224 + Keccak_256, Blake2b_224, + IntegerToByteString, ByteStringToInteger, + AndByteString, IorByteString, XorByteString, ComplementByteString, + ShiftByteString, RotateByteString, + TestBitByteString, WriteBitByteString, + PopCountByteString, FindFirstSetByteString ]) ] diff --git a/plutus-tx/src/PlutusTx/Builtins.hs b/plutus-tx/src/PlutusTx/Builtins.hs index bd708e4afe8..0c9be5ef95d 100644 --- a/plutus-tx/src/PlutusTx/Builtins.hs +++ b/plutus-tx/src/PlutusTx/Builtins.hs @@ -4,8 +4,8 @@ -- | Primitive names and functions for working with Plutus Core builtins. module PlutusTx.Builtins ( - -- * Bytestring builtins - BuiltinByteString + -- * Bytestring builtins + BuiltinByteString , appendByteString , consByteString , sliceByteString @@ -96,6 +96,19 @@ module PlutusTx.Builtins ( , bls12_381_millerLoop , bls12_381_mulMlResult , bls12_381_finalVerify + -- * Bitwise builtins + , integerToByteString + , byteStringToInteger + , andByteString + , iorByteString + , xorByteString + , complementByteString + , shiftByteString + , rotateByteString + , popCountByteString + , testBitByteString + , writeBitByteString + , findFirstSetByteString -- * Conversions , fromBuiltin , toBuiltin @@ -283,6 +296,90 @@ verifySchnorrSecp256k1Signature verifySchnorrSecp256k1Signature vk msg sig = fromBuiltin (BI.verifySchnorrSecp256k1Signature vk msg sig) +-- | Converts a non-negative 'Integer' into its base-256 'BuiltinByteString' representation. +-- The format is little-endian, i.e. the first byte is the least significant. +-- The inverse of this is 'byteStringToInteger'. +-- The output does not contain any trailing zero-bytes, hence zeros are empty bytestrings. +-- If the input is negative, this function errs. +{-# INLINEABLE integerToByteString #-} +integerToByteString :: Integer -> BuiltinByteString +integerToByteString i = BI.integerToByteString (toBuiltin i) + +-- | Converts a base-256 'BuiltinByteString' into its 'Integer' representation. +-- The format is little-endian, i.e. the first byte is the least significant. +-- The inverse of this is 'integerToByteString'. +-- The input can contain trailing zero-bytes. +{-# INLINEABLE byteStringToInteger #-} +byteStringToInteger :: BuiltinByteString -> Integer +byteStringToInteger bs = fromBuiltin (BI.byteStringToInteger bs) + +-- | If given bytestrings of equal length, constructs their bitwise logical +-- AND, erring otherwise. +{-# INLINEABLE andByteString #-} +andByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +andByteString = BI.andByteString + +-- | If given bytestrings of equal length, constructs their bitwise logical +-- OR, erring otherwise. +{-# INLINEABLE iorByteString #-} +iorByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +iorByteString = BI.iorByteString + +-- | If given bytestrings of equal length, constructs their bitwise logical +-- XOR, erroring otherwise. +{-# INLINEABLE xorByteString #-} +xorByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +xorByteString = BI.xorByteString + +-- | If given bytestrings of equal length, constructs the flipped bytestring, +-- i.e. each bit is flipped. +{-# INLINEABLE complementByteString #-} +complementByteString :: BuiltinByteString -> BuiltinByteString +complementByteString = BI.complementByteString + +-- | Shifts the input bytestring left by the specified (possibly negative) amount. +-- If positive, shifts left/to higher significance. +-- If negative, shifts right/to lower significance. +-- The shift is **not** arithmetic. You can emulate an arithmetic +-- shift by OR-ing with what is morally -1 left-shifted the appropriate amount. +-- The output is not trimmed, hence trailing zero-bytes may remain. +{-# INLINEABLE shiftByteString #-} +shiftByteString :: BuiltinByteString -> Integer -> BuiltinByteString +shiftByteString bs i = BI.shiftByteString bs (toBuiltin i) + +-- | Rotates the input bytestring left by the specified (possibly negative) amount. +-- If positive, rotates left/to higher significance. +-- If negative, rotates right/to lower significance. +{-# INLINEABLE rotateByteString #-} +rotateByteString :: BuiltinByteString -> Integer -> BuiltinByteString +rotateByteString bs i = BI.rotateByteString bs (toBuiltin i) + +-- | Counts the number of 1 bits in the argument. +{-# INLINEABLE popCountByteString #-} +popCountByteString :: BuiltinByteString -> Integer +popCountByteString bs = fromBuiltin (BI.popCountByteString bs) + +-- | Bitwise indexing operation. Errs when given an index that's not +-- in-bounds: specifically, indices that are either negative or greater than or +-- equal to the number of bits in the 'BuiltinByteString' argument. +{-# INLINEABLE testBitByteString #-} +testBitByteString :: BuiltinByteString -> Integer -> Bool +testBitByteString bs i = fromBuiltin (BI.testBitByteString bs (toBuiltin i)) + +-- | Bitwise modification at an index. Errs when given an index that's not +-- in-bounds: specifically, indices that are either negative or greater than +-- or equal to the number of bits in the 'BuiltinByteString' argument. +{-# INLINEABLE writeBitByteString #-} +writeBitByteString :: BuiltinByteString -> Integer -> Bool -> BuiltinByteString +writeBitByteString bs i b = BI.writeBitByteString bs (toBuiltin i) (toBuiltin b) + +-- | Finds the lowest bit index such that 'testBitByteString' at that index is +-- 'True'. Returns @-1@ if no such index exists: that is, the +-- 'BuiltinByteString' argument has only zero bytes in it, or is empty. +{-# INLINEABLE findFirstSetByteString #-} +findFirstSetByteString :: BuiltinByteString -> Integer +findFirstSetByteString bs = fromBuiltin (BI.findFirstSetByteString bs) + {-# INLINABLE addInteger #-} -- | Add two 'Integer's. addInteger :: Integer -> Integer -> Integer diff --git a/plutus-tx/src/PlutusTx/Builtins/Internal.hs b/plutus-tx/src/PlutusTx/Builtins/Internal.hs index 9a5cf7f17a8..fa1bdebbc6d 100644 --- a/plutus-tx/src/PlutusTx/Builtins/Internal.hs +++ b/plutus-tx/src/PlutusTx/Builtins/Internal.hs @@ -14,6 +14,7 @@ -- Most users should not use this module directly, but rather use 'PlutusTx.Builtins'. module PlutusTx.Builtins.Internal where +import Bitwise qualified import Codec.Serialise import Control.DeepSeq (NFData (..)) import Control.Monad.Trans.Writer.Strict (runWriter) @@ -25,6 +26,7 @@ import Data.Data import Data.Foldable qualified as Foldable import Data.Hashable (Hashable (..)) import Data.Kind (Type) +import Data.Maybe (fromJust) import Data.Text as Text (Text, empty) import Data.Text.Encoding as Text (decodeUtf8, encodeUtf8) import PlutusCore.Builtin.Emitter (Emitter (Emitter)) @@ -306,6 +308,86 @@ lessThanEqualsByteString (BuiltinByteString b1) (BuiltinByteString b2) = Builtin decodeUtf8 :: BuiltinByteString -> BuiltinString decodeUtf8 (BuiltinByteString b) = BuiltinString $ Text.decodeUtf8 b +{-# NOINLINE integerToByteString #-} +integerToByteString :: BuiltinInteger -> BuiltinByteString +integerToByteString = BuiltinByteString . fromJust . Bitwise.integerToByteString + +{-# NOINLINE byteStringToInteger #-} +byteStringToInteger :: BuiltinByteString -> BuiltinInteger +byteStringToInteger (BuiltinByteString bs) = Bitwise.byteStringToInteger bs + +{-# NOINLINE andByteString #-} +andByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +andByteString (BuiltinByteString bs) (BuiltinByteString bs') = + case Bitwise.andByteString bs bs' of + Emitter f -> case runWriter f of + (res, logs) -> traceAll logs $ case res of + EvaluationFailure -> mustBeReplaced "Bitwise AND errored." + EvaluationSuccess bs'' -> BuiltinByteString bs'' + +{-# NOINLINE iorByteString #-} +iorByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +iorByteString (BuiltinByteString bs) (BuiltinByteString bs') = + case Bitwise.iorByteString bs bs' of + Emitter f -> case runWriter f of + (res, logs) -> traceAll logs $ case res of + EvaluationFailure -> mustBeReplaced "Bitwise IOR errored." + EvaluationSuccess bs'' -> BuiltinByteString bs'' + +{-# NOINLINE xorByteString #-} +xorByteString :: BuiltinByteString -> BuiltinByteString -> BuiltinByteString +xorByteString (BuiltinByteString bs) (BuiltinByteString bs') = + case Bitwise.xorByteString bs bs' of + Emitter f -> case runWriter f of + (res, logs) -> traceAll logs $ case res of + EvaluationFailure -> mustBeReplaced "Bitwise XOR errored." + EvaluationSuccess bs'' -> BuiltinByteString bs'' + +{-# NOINLINE complementByteString #-} +complementByteString :: BuiltinByteString -> BuiltinByteString +complementByteString (BuiltinByteString bs) = + BuiltinByteString . Bitwise.complementByteString $ bs + +{-# NOINLINE shiftByteString #-} +shiftByteString :: BuiltinByteString -> BuiltinInteger -> BuiltinByteString +shiftByteString (BuiltinByteString bs) = + BuiltinByteString . Bitwise.shiftByteString bs + +{-# NOINLINE rotateByteString #-} +rotateByteString :: BuiltinByteString -> BuiltinInteger -> BuiltinByteString +rotateByteString (BuiltinByteString bs) = + BuiltinByteString . Bitwise.rotateByteString bs + +{-# NOINLINE popCountByteString #-} +popCountByteString :: BuiltinByteString -> BuiltinInteger +popCountByteString (BuiltinByteString bs) = Bitwise.popCountByteString bs + +{-# NOINLINE testBitByteString #-} +testBitByteString :: BuiltinByteString -> BuiltinInteger -> BuiltinBool +testBitByteString (BuiltinByteString bs) i = + case Bitwise.testBitByteString bs i of + Emitter f -> case runWriter f of + (res, logs) -> traceAll logs $ case res of + EvaluationFailure -> mustBeReplaced "Bitwise indexing errored." + EvaluationSuccess b -> BuiltinBool b + +{-# NOINLINE writeBitByteString #-} +writeBitByteString :: + BuiltinByteString -> + BuiltinInteger -> + BuiltinBool -> + BuiltinByteString +writeBitByteString (BuiltinByteString bs) i (BuiltinBool b) = + case Bitwise.writeBitByteString bs i b of + Emitter f -> case runWriter f of + (res, logs) -> traceAll logs $ case res of + EvaluationFailure -> mustBeReplaced "Bitwise indexed write errored." + EvaluationSuccess bs' -> BuiltinByteString bs' + +{-# NOINLINE findFirstSetByteString #-} +findFirstSetByteString :: BuiltinByteString -> BuiltinInteger +findFirstSetByteString (BuiltinByteString bs) = Bitwise.popCountByteString bs + {- STRING -}