Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added file write benchmarks #585

Merged
merged 8 commits into from
May 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions benchmarks/haskell/Benchmarks.hs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import qualified Benchmarks.DecodeUtf8 as DecodeUtf8
import qualified Benchmarks.EncodeUtf8 as EncodeUtf8
import qualified Benchmarks.Equality as Equality
import qualified Benchmarks.FileRead as FileRead
import qualified Benchmarks.FileWrite as FileWrite
import qualified Benchmarks.FoldLines as FoldLines
import qualified Benchmarks.Micro as Micro
import qualified Benchmarks.Multilang as Multilang
Expand Down Expand Up @@ -59,6 +60,9 @@ main = do
let tf = ("benchmarks/text-test-data" </>)
-- Cannot use envWithCleanup, because there is no instance NFData Handle
(sinkFn, sink) <- mkSink
(fileWriteBenchmarks, fileWriteCleanup) <- FileWrite.mkFileWriteBenchmarks $ do
(fp, h) <- mkSink
return (h, rmSink fp)
defaultMain
[ Builder.benchmark
, Concat.benchmark
Expand All @@ -77,6 +81,7 @@ main = do
]
, env (Equality.initEnv (tf "japanese.txt")) Equality.benchmark
, FileRead.benchmark (tf "russian.txt")
, fileWriteBenchmarks
, FoldLines.benchmark (tf "russian.txt")
, Multilang.benchmark
, bgroup "Pure"
Expand All @@ -102,3 +107,4 @@ main = do
]
]
rmSink sinkFn
fileWriteCleanup
132 changes: 132 additions & 0 deletions benchmarks/haskell/Benchmarks/FileWrite.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
-- | Benchmarks simple file writing
--
-- Tested in this benchmark:
--
-- * Writing a file to the disk
--

{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}

module Benchmarks.FileWrite
( mkFileWriteBenchmarks
) where

import Control.DeepSeq (NFData, deepseq)
import Data.Bifunctor (first)
import Data.List (intercalate, intersperse)
import Data.Semigroup ((<>))

Check warning on line 18 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

The import of ‘Data.Semigroup’ is redundant

Check warning on line 18 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

The import of ‘Data.Semigroup’ is redundant
import Data.String (fromString)
import Data.Text (StrictText)
import Data.Text.Internal.Lazy (LazyText, defaultChunkSize)
import System.IO (Handle, Newline(CRLF,LF), NewlineMode(NewlineMode), BufferMode(NoBuffering,LineBuffering,BlockBuffering), hSetBuffering, hSetNewlineMode)

Check warning on line 22 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

The import of ‘NoBuffering’ from module ‘System.IO’ is redundant

Check warning on line 22 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

The import of ‘NoBuffering’ from module ‘System.IO’ is redundant
import Test.Tasty.Bench (Benchmark, bgroup, bench, whnfAppIO)
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.IO.Utf8 as Utf8
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L

mkFileWriteBenchmarks :: IO (Handle, IO ()) -> IO (Benchmark, IO ())
mkFileWriteBenchmarks mkSinkNRemove = do
let writeData = L.cycle $ fromString [minBound..maxBound]

#ifdef ExtendedBenchmarks
lengths = [0..5] <> [10,20..100] <> [1000,3000,10000,100000]
#else
lengths = [0,1,100,3000,10000,100000]
#endif

testGroup :: NFData text => (Handle -> text -> IO ()) -> ((String, StrictText -> text)) -> Newline -> BufferMode -> IO (Benchmark, IO ())
testGroup hPutStr (textCharacteristics, select) nl mode = do
(h, removeFile) <- mkSinkNRemove
hSetBuffering h mode
hSetNewlineMode h $ NewlineMode nl nl
pure
( bgroup (intercalate " " [textCharacteristics, show nl, show mode]) $
lengths <&> \n -> let
t = select $ L.toStrict $ L.take n writeData
in bench ("length " <> show n)
$ deepseq t
$ whnfAppIO (hPutStr h) t
, removeFile
)

sequenceGroup "FileWrite hPutStr"
#ifdef ExtendedBenchmarks
[ testGroup T.hPutStr strict LF NoBuffering
, testGroup L.hPutStr lazy LF NoBuffering

, testGroup T.hPutStr strict LF LineBuffering
, testGroup T.hPutStr strict CRLF LineBuffering
, testGroup T.hPutStr strictNewlines LF LineBuffering
, testGroup T.hPutStr strictNewlines CRLF LineBuffering

, testGroup L.hPutStr lazy LF LineBuffering
, testGroup L.hPutStr lazy CRLF LineBuffering
, testGroup L.hPutStr lazySmallChunks LF LineBuffering
, testGroup L.hPutStr lazySmallChunks CRLF LineBuffering
, testGroup L.hPutStr lazyNewlines LF LineBuffering
, testGroup L.hPutStr lazyNewlines CRLF LineBuffering
, testGroup L.hPutStr lazySmallChunksNewlines LF LineBuffering
, testGroup L.hPutStr lazySmallChunksNewlines CRLF LineBuffering

, testGroup T.hPutStr strict LF (BlockBuffering Nothing)
, testGroup T.hPutStr strict CRLF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines LF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines CRLF (BlockBuffering Nothing)

, testGroup L.hPutStr lazy LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazy CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunks LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunks CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines CRLF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunksNewlines LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazySmallChunksNewlines CRLF (BlockBuffering Nothing)

, sequenceGroup "UTF-8"
[ testGroup Utf8.hPutStr strict LF NoBuffering
, testGroup Utf8.hPutStr strict LF LineBuffering
, testGroup Utf8.hPutStr strict LF (BlockBuffering Nothing)
]
]
#else
[ testGroup T.hPutStr strictNewlines LF LineBuffering
, testGroup T.hPutStr strictNewlines CRLF LineBuffering

, testGroup T.hPutStr strict LF (BlockBuffering Nothing)
, testGroup T.hPutStr strictNewlines CRLF (BlockBuffering Nothing)

, testGroup L.hPutStr lazyNewlines LF LineBuffering
, testGroup L.hPutStr lazyNewlines CRLF LineBuffering

, testGroup L.hPutStr lazy LF (BlockBuffering Nothing)
, testGroup L.hPutStr lazyNewlines CRLF (BlockBuffering Nothing)

, sequenceGroup "UTF-8"
[ testGroup Utf8.hPutStr strict LF LineBuffering
, testGroup Utf8.hPutStr strict LF (BlockBuffering Nothing)
]
]
#endif

where
lazy, lazyNewlines, lazySmallChunks, lazySmallChunksNewlines :: (String, StrictText -> LazyText)
lazy = ("lazy", L.fromChunks . T.chunksOf defaultChunkSize)
lazyNewlines = ("lazy many newlines", snd lazy . snd strictNewlines)
lazySmallChunks = ("lazy small chunks", L.fromChunks . T.chunksOf 10)

Check warning on line 118 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

Defined but not used: ‘lazySmallChunks’

Check warning on line 118 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

Defined but not used: ‘lazySmallChunks’
lazySmallChunksNewlines = ("lazy small chunks many newlines", snd lazySmallChunks . snd strictNewlines)

Check warning on line 119 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (windows-latest, latest)

Defined but not used: ‘lazySmallChunksNewlines’

Check warning on line 119 in benchmarks/haskell/Benchmarks/FileWrite.hs

View workflow job for this annotation

GitHub Actions / build (macOS-latest, latest)

Defined but not used: ‘lazySmallChunksNewlines’

strict, strictNewlines :: (String, StrictText -> StrictText)
strict = ("strict", id)
strictNewlines = ("strict many newlines", mconcat . intersperse "\n" . T.chunksOf 5)

sequenceGroup groupName tgs
= first (bgroup groupName)
. foldr (\(b,r) (bs,rs) -> (b:bs,r>>rs)) ([], return ())
<$> sequence tgs

(<&>) :: Functor f => f a -> (a -> b) -> f b
(<&>) = flip fmap

8 changes: 8 additions & 0 deletions text.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,11 @@ flag pure-haskell
default: False
manual: True

flag ExtendedBenchmarks
description: Runs extra benchmarks which can be very slow.
default: False
manual: True

library
if arch(javascript) || flag(pure-haskell)
cpp-options: -DPURE_HASKELL
Expand Down Expand Up @@ -312,6 +317,8 @@ benchmark text-benchmarks
ghc-options: -Wall -O2 -rtsopts "-with-rtsopts=-A32m"
if impl(ghc >= 8.6)
ghc-options: -fproc-alignment=64
if flag(ExtendedBenchmarks)
cpp-options: -DExtendedBenchmarks

build-depends: base,
bytestring >= 0.10.4,
Expand All @@ -332,6 +339,7 @@ benchmark text-benchmarks
Benchmarks.EncodeUtf8
Benchmarks.Equality
Benchmarks.FileRead
Benchmarks.FileWrite
Benchmarks.FoldLines
Benchmarks.Micro
Benchmarks.Multilang
Expand Down
Loading