Skip to content

Commit

Permalink
[Bench] Fix arbitrary evaluation nonsense (IntersectMBO#5867)
Browse files Browse the repository at this point in the history
This unscrews another portion of benchmarks. We still have benchmarks that are screwed up (see [PLT-6541](https://input-output.atlassian.net/browse/PLT-6541)) See [this](IntersectMBO#4914 (comment)) comment for an explanation of what went wrong.
  • Loading branch information
effectfully authored and v0d1ch committed Dec 6, 2024
1 parent 5dd488a commit a0afd2a
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 92 deletions.
2 changes: 2 additions & 0 deletions plutus-benchmark/README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
## Plutus Benchmarks

When implementing evaluation benchmarks, make sure to use 'mkEvalCtx' and 'evaluateCekForBench' to mimic the behavior of the ledger. If you use something else for evaluation, the results are likely not going to be representative of what actually happens in production.

This directory contains four sets of benchmarks:

* `nofib`: Plutus versions of some of Haskell's `nofib` benchmarks from https://github.com/ghc/nofib.
Expand Down
29 changes: 13 additions & 16 deletions plutus-benchmark/cek-calibration/Main.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE OverloadedStrings #-}
Expand All @@ -16,13 +17,13 @@ module Main (main) where

import Prelude qualified as Haskell

import PlutusBenchmark.Common (benchTermCek, mkEvalCtx)
import PlutusCore
import PlutusCore.Evaluation.Machine.ExBudgetingDefaults
import PlutusCore.Pretty qualified as PP
import PlutusLedgerApi.Common (EvaluationContext)
import PlutusTx qualified as Tx
import PlutusTx.Prelude as Tx
import UntypedPlutusCore as UPLC
import UntypedPlutusCore.Evaluation.Machine.Cek

import Control.Exception
import Control.Lens
Expand All @@ -32,13 +33,6 @@ import Criterion.Types qualified as C

type PlainTerm = UPLC.Term Name DefaultUni DefaultFun ()


benchCek :: UPLC.Term NamedDeBruijn DefaultUni DefaultFun () -> Benchmarkable
benchCek t = case runExcept @UPLC.FreeVariableError $ runQuoteT $ UPLC.unDeBruijnTerm t of
Left e -> throw e
Right t' -> whnf (unsafeEvaluateCekNoEmit defaultCekParameters) t'


{-# INLINABLE rev #-}
rev :: [()] -> [()]
rev l0 = rev' l0 []
Expand Down Expand Up @@ -74,11 +68,11 @@ mkListTerm n =
let (UPLC.Program _ _ code) = mkListProg n
in code

mkListBM :: Integer -> Benchmark
mkListBM n = bench (Haskell.show n) $ benchCek (mkListTerm n)
mkListBM :: EvaluationContext -> Integer -> Benchmark
mkListBM ctx n = bench (Haskell.show n) $ benchTermCek ctx (mkListTerm n)

mkListBMs :: [Integer] -> Benchmark
mkListBMs ns = bgroup "List" [mkListBM n | n <- ns]
mkListBMs :: EvaluationContext -> [Integer] -> Benchmark
mkListBMs ctx ns = bgroup "List" [mkListBM ctx n | n <- ns]

writePlc :: UPLC.Program NamedDeBruijn DefaultUni DefaultFun () -> Haskell.IO ()
writePlc p =
Expand All @@ -91,10 +85,13 @@ writePlc p =


main1 :: Haskell.IO ()
main1 =
defaultMainWith (defaultConfig { C.csvFile = Just "cek-lists.csv" }) $ [mkListBMs [0,10..1000]]
main1 = do
evalCtx <- evaluate mkEvalCtx
defaultMainWith
(defaultConfig { C.csvFile = Just "cek-lists.csv" })
[mkListBMs evalCtx [0,10..1000]]

main2:: Haskell.IO ()
main2 :: Haskell.IO ()
main2 = writePlc (mkListProg 999)

main :: Haskell.IO ()
Expand Down
61 changes: 53 additions & 8 deletions plutus-benchmark/common/PlutusBenchmark/Common.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ViewPatterns #-}

Expand All @@ -11,11 +12,14 @@ module PlutusBenchmark.Common
, toNamedDeBruijnTerm
, compiledCodeToTerm
, haskellValueToTerm
, benchTermCek
, benchProgramCek
, unsafeRunTermCek
, runTermCek
, cekResultMatchesHaskellValue
, mkEvalCtx
, evaluateCekLikeInProd
, evaluateCekForBench
, benchTermCek
, benchTermAgdaCek
, benchProgramAgdaCek
, TestSize (..)
Expand All @@ -29,17 +33,23 @@ where
import Paths_plutus_benchmark as Export
import PlutusBenchmark.ProtocolParameters as PP

import PlutusLedgerApi.Common qualified as LedgerApi

import PlutusTx qualified as Tx

import PlutusCore qualified as PLC
import PlutusCore.Default
import PlutusCore.Evaluation.Machine.ExBudget (ExBudget (..))
import PlutusCore.Evaluation.Machine.ExBudgetingDefaults qualified as PLC
import PlutusCore.Evaluation.Machine.ExMemory (ExCPU (..), ExMemory (..))
import PlutusTx qualified as Tx

import UntypedPlutusCore qualified as UPLC
import UntypedPlutusCore.Evaluation.Machine.Cek as Cek
import UntypedPlutusCore.Evaluation.Machine.Cek qualified as UPLC

import MAlonzo.Code.Evaluator.Term (runUAgda)

import Control.DeepSeq (force)
import Criterion.Main
import Criterion.Types (Config (..))
import Data.ByteString qualified as BS
Expand Down Expand Up @@ -105,12 +115,6 @@ haskellValueToTerm
:: Tx.Lift DefaultUni a => a -> Term
haskellValueToTerm = compiledCodeToTerm . Tx.liftCodeDef


{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
benchTermCek :: Term -> Benchmarkable
benchTermCek term =
nf unsafeRunTermCek $! term -- Or whnf?

{- | Convert a de-Bruijn-named UPLC term to a CEK Benchmark -}
benchProgramCek :: Program -> Benchmarkable
benchProgramCek (UPLC.Program _ _ term) =
Expand Down Expand Up @@ -156,6 +160,47 @@ cekResultMatchesHaskellValue
cekResultMatchesHaskellValue term matches value =
(unsafeRunTermCek term) `matches` (unsafeRunTermCek $ haskellValueToTerm value)

-- | Create the evaluation context for the benchmarks. This doesn't exactly match how it's done
-- on-chain, but that's okay because the evaluation context is cached by the ledger, so we're
-- deliberately not including it in the benchmarks.
mkEvalCtx :: LedgerApi.EvaluationContext
mkEvalCtx =
case PLC.defaultCostModelParams of
Just p ->
let errOrCtx =
-- The validation benchmarks were all created from PlutusV1 scripts
LedgerApi.mkDynEvaluationContext DefaultFunSemanticsVariant1 p
in case errOrCtx of
Right ec -> ec
Left err -> error $ show err
Nothing -> error "Couldn't get cost model params"

-- | Evaluate a term as it would be evaluated using the on-chain evaluator.
evaluateCekLikeInProd
:: LedgerApi.EvaluationContext
-> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-> Either
(UPLC.CekEvaluationException UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun)
(UPLC.Term UPLC.NamedDeBruijn UPLC.DefaultUni UPLC.DefaultFun ())
evaluateCekLikeInProd evalCtx term = do
let (getRes, _, _) =
let -- The validation benchmarks were all created from PlutusV1 scripts
pv = LedgerApi.ledgerLanguageIntroducedIn LedgerApi.PlutusV1
in LedgerApi.evaluateTerm UPLC.restrictingEnormous pv LedgerApi.Quiet evalCtx term
getRes

-- | Evaluate a term and either throw if evaluation fails or discard the result and return '()'.
-- Useful for benchmarking.
evaluateCekForBench
:: LedgerApi.EvaluationContext
-> UPLC.Term PLC.NamedDeBruijn PLC.DefaultUni PLC.DefaultFun ()
-> ()
evaluateCekForBench evalCtx = either (error . show) (\_ -> ()) . evaluateCekLikeInProd evalCtx

benchTermCek :: LedgerApi.EvaluationContext -> Term -> Benchmarkable
benchTermCek evalCtx term =
let !term' = force term
in whnf (evaluateCekForBench evalCtx) term'

---------------- Run a term or program using the plutus-metatheory CEK evaluator ----------------

Expand Down
25 changes: 16 additions & 9 deletions plutus-benchmark/lists/bench/Bench.hs
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
{-# LANGUAGE BangPatterns #-}

{- | Plutus benchmarks for some simple list algorithms. See README.md for more information. -}

module Main (main) where

import Criterion.Main

import PlutusBenchmark.Common (benchTermCek, getConfig)

import PlutusBenchmark.Common (benchTermCek, getConfig, mkEvalCtx)
import PlutusBenchmark.Lists.Sort qualified as Sort

import PlutusBenchmark.Lists.Sum.Compiled qualified as Sum.Compiled
import PlutusBenchmark.Lists.Sum.HandWritten qualified as Sum.HandWritten
import PlutusLedgerApi.Common (EvaluationContext)

benchmarks :: [Benchmark]
benchmarks =
import Control.DeepSeq
import Control.Exception
import Data.Functor

benchmarks :: EvaluationContext -> [Benchmark]
benchmarks ctx =
[ bgroup "sort"
[ mkBMsForSort "ghcSort" Sort.mkWorstCaseGhcSortTerm
, mkBMsForSort "insertionSort" Sort.mkWorstCaseInsertionSortTerm
Expand All @@ -38,15 +43,17 @@ benchmarks =
]
where
mkBMsForSort name f =
bgroup name $ map (\n -> bench (show n) . benchTermCek . f $ n) sizesForSort
bgroup name $ sizesForSort <&> \n ->
bench (show n) $ benchTermCek ctx (f n)
sizesForSort = [10, 20..500]
mkBMsForSum name f =
bgroup name $ map (\n -> bench (show n) . benchTermCek . f $ [1..n]) sizesForSum
bgroup name $ sizesForSum <&> \n ->
bench (show n) $ benchTermCek ctx (f [1..n])
sizesForSum = [10, 50, 100, 500, 1000, 5000, 10000]

main :: IO ()
main = do
-- Run each benchmark for at least 15 seconds. Change this with -L or --timeout.
config <- getConfig 15.0
defaultMainWith config benchmarks

evalCtx <- evaluate $ force mkEvalCtx
defaultMainWith config $ benchmarks evalCtx
12 changes: 9 additions & 3 deletions plutus-benchmark/nofib/bench/BenchCek.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
{-# LANGUAGE BangPatterns #-}

{- | Plutus benchmarks for the CEK machine based on some nofib examples. -}
module Main where

import PlutusBenchmark.Common (benchTermCek)
import Shared (benchWith)
import Shared (benchTermCek, benchWith, mkEvalCtx)

import Control.DeepSeq (force)
import Control.Exception (evaluate)

main :: IO ()
main = benchWith benchTermCek
main = do
evalCtx <- evaluate $ force mkEvalCtx
benchWith $ benchTermCek evalCtx
12 changes: 8 additions & 4 deletions plutus-benchmark/nofib/bench/Shared.hs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
{- | Shared code for benchmarking Plutus and Haskell versions of the Plutus nofib examples -}
module Shared (benchWith, mkBenchMarks)
where

import PlutusBenchmark.Common (Term, getConfig)
module Shared (
benchWith
, mkBenchMarks
, mkEvalCtx
, benchTermCek
) where

import PlutusBenchmark.Common (Term, benchTermCek, getConfig, mkEvalCtx)

import PlutusBenchmark.NoFib.Clausify qualified as Clausify
import PlutusBenchmark.NoFib.Knights qualified as Knights
Expand Down
20 changes: 13 additions & 7 deletions plutus-benchmark/plutus-benchmark.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,12 @@ library plutus-benchmark-common
, base >=4.9 && <5
, bytestring
, criterion
, deepseq
, directory
, filepath
, flat ^>=0.6
, plutus-core ^>=1.25
, plutus-ledger-api ^>=1.25
, plutus-metatheory
, plutus-tx ^>=1.25
, tasty
Expand Down Expand Up @@ -141,6 +143,7 @@ benchmark nofib
build-depends:
, base >=4.9 && <5
, criterion >=1.5.9.0
, deepseq
, nofib-internal
, plutus-benchmark-common

Expand Down Expand Up @@ -220,8 +223,10 @@ benchmark lists
build-depends:
, base >=4.9 && <5
, criterion >=1.5.9.0
, deepseq
, lists-internal
, plutus-benchmark-common
, plutus-ledger-api

test-suite plutus-benchmark-lists-tests
import: lang, ghc-version-support
Expand Down Expand Up @@ -262,7 +267,6 @@ benchmark validation
, optparse-applicative
, plutus-benchmark-common
, plutus-core ^>=1.25
, plutus-ledger-api ^>=1.25

---------------- validation-decode ----------------

Expand Down Expand Up @@ -319,13 +323,16 @@ benchmark cek-calibration
main-is: Main.hs
hs-source-dirs: cek-calibration
build-depends:
, base >=4.9 && <5
, criterion >=1.5.9.0
, base >=4.9 && <5
, criterion >=1.5.9.0
, deepseq
, lens
, mtl
, plutus-core ^>=1.25
, plutus-tx ^>=1.25
, plutus-tx-plugin ^>=1.25
, plutus-benchmark-common
, plutus-core ^>=1.25
, plutus-ledger-api ^>=1.25
, plutus-tx ^>=1.25
, plutus-tx-plugin ^>=1.25

---------------- Signature verification throughput ----------------

Expand Down Expand Up @@ -567,7 +574,6 @@ benchmark validation-agda-cek
, optparse-applicative
, plutus-benchmark-common
, plutus-core ^>=1.25
, plutus-ledger-api ^>=1.25

benchmark nofib-agda-cek
import: lang, ghc-version-support
Expand Down
12 changes: 2 additions & 10 deletions plutus-benchmark/validation/bench/BenchCek.hs
Original file line number Diff line number Diff line change
@@ -1,16 +1,12 @@
{- | Validation benchmarks for the CEK machine. -}

{-# LANGUAGE BangPatterns #-}
module Main where

import Common (benchWith, evaluateCekLikeInProd, mkEvalCtx, unsafeUnflat)
import Common (benchTermCek, benchWith, mkEvalCtx, unsafeUnflat)
import Control.DeepSeq (force)
import Control.Exception (evaluate)
import PlutusBenchmark.Common (toNamedDeBruijnTerm)
import UntypedPlutusCore as UPLC

import Criterion (whnf)

{-|
Benchmarks only for the CEK execution time of the data/*.flat validation scripts
Expand All @@ -23,9 +19,5 @@ main :: IO ()
main = do
evalCtx <- evaluate $ force mkEvalCtx
let mkCekBM file program =
-- don't count the undebruijn . unflat cost
-- `force` to try to ensure that deserialiation is not included in benchmarking time.
let !benchTerm = force . toNamedDeBruijnTerm . UPLC._progTerm $ unsafeUnflat file program
eval = either (error . show) (\_ -> ()) . evaluateCekLikeInProd evalCtx
in whnf eval benchTerm
benchTermCek evalCtx . toNamedDeBruijnTerm . UPLC._progTerm $ unsafeUnflat file program
benchWith mkCekBM
Loading

0 comments on commit a0afd2a

Please sign in to comment.