Skip to content

Commit

Permalink
Merge pull request #6447 from phadej/grayjay-update-solver-benchmarks
Browse files Browse the repository at this point in the history
Grayjay update solver benchmarks
  • Loading branch information
phadej authored Dec 18, 2019
2 parents 8f42d3f + 676ffbd commit 787b1f2
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 22 deletions.
2 changes: 1 addition & 1 deletion .docker/validate-8.8.1.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,4 @@ RUN cabal v2-install -w ghc-8.8.1 --lib \
# Validate
WORKDIR /build
COPY . /build
RUN sh ./validate.sh -w ghc-8.8.1 -v -D
RUN sh ./validate.sh -w ghc-8.8.1 -v -D -b
2 changes: 1 addition & 1 deletion cabal.project.validate
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
packages: Cabal/ cabal-testsuite/ cabal-install/
packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/

write-ghc-environment-files: never

Expand Down
77 changes: 59 additions & 18 deletions solver-benchmarks/HackageBenchmark.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
, mannWhitneyUCriticalValue
, mannWhitneyUtest)
import Statistics.Types (PValue, mkPValue)
import System.Directory (getTemporaryDirectory)
import System.Exit (ExitCode(..), exitFailure)
import System.FilePath ((</>))
import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
, stdout)
import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
Expand Down Expand Up @@ -56,6 +58,10 @@ data CabalResult
= Solution
| NoInstallPlan
| BackjumpLimit
| Unbuildable
| UnbuildableDep
| ComponentCycle
| ModReexpIssue
| PkgNotFound
| Timeout
| Unknown
Expand All @@ -81,14 +87,14 @@ hackageBenchmarkMain = do
-- "trial" or "summary".
when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
putStrLn $
printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
nameColumnWidth "package" "result1" "result2"
"mean1" "mean2" "stddev1" "stddev2" "speedup"

forM_ pkgs $ \pkg -> do
let printTrial msgType result1 result2 time1 time2 =
putStrLn $
printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
msgType nameColumnWidth (unPackageName pkg)
(show result1) (show result2)
(diffTimeToDouble time1) (diffTimeToDouble time2)
Expand Down Expand Up @@ -125,7 +131,7 @@ hackageBenchmarkMain = do
if isSignificantResult result1 result2
|| isSignificantTimeDifference argPValue ts1 ts2
then putStrLn $
printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
nameColumnWidth (unPackageName pkg)
(show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
else when (argPrintTrials || argPrintSkippedPackages) $
Expand Down Expand Up @@ -169,10 +175,37 @@ hackageBenchmarkMain = do
runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
runCabal timeoutSeconds cabal flags pkg = do
((exitCode, err), time) <- timeEvent $ do
tmpDir <- getTemporaryDirectory

let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
cabalCmd =
unwords $
[cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
cabalCmd = unwords $
[ cabal

-- A non-existent store directory prevents cabal from reading the
-- store, which would cause the size of the store to affect run
-- time.
, "--store-dir=" ++ (tmpDir </> "non-existent-store-dir")

, "v2-install"

-- These flags prevent a Cabal project or package environment from
-- affecting the install plan.
, "--ignore-project"
, "--package-env=non-existent-package-env"

-- --lib allows solving for packages with libraries or
-- executables.
, "--lib"

, unPackageName pkg

, "--dry-run"

-- The test doesn't currently handle stdout, so we suppress it
-- with silent. nowrap simplifies parsing the errors messages.
, "-vsilent+nowrap"]

++ flags
cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }

-- TODO: Read stdout and compare the install plans.
Expand All @@ -182,12 +215,16 @@ runCabal timeoutSeconds cabal flags pkg = do
let exhaustiveMsg =
"After searching the rest of the dependency tree exhaustively"
result
| exitCode == ExitSuccess = Solution
| exitCode == ExitFailure 124 = Timeout
| fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan
| fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit
| fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
| otherwise = Unknown
| exitCode == ExitSuccess = Solution
| exitCode == ExitFailure 124 = Timeout
| fromString exhaustiveMsg `B.isInfixOf` err = NoInstallPlan
| fromString "Backjump limit reached" `B.isInfixOf` err = BackjumpLimit
| fromString "none of the components are available to build" `B.isInfixOf` err = Unbuildable
| fromString "Dependency on unbuildable" `B.isInfixOf` err = UnbuildableDep
| fromString "Dependency cycle between the following components" `B.isInfixOf` err = ComponentCycle
| fromString "Problem with module re-exports" `B.isInfixOf` err = ModReexpIssue
| fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
| otherwise = Unknown
return (CabalTrial time result)

isSampleLargeEnough :: PValue Double -> Int -> Bool
Expand Down Expand Up @@ -224,12 +261,16 @@ isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)

-- Is this result expected in a benchmark run on all of Hackage?
isExpectedResult :: CabalResult -> Bool
isExpectedResult Solution = True
isExpectedResult NoInstallPlan = True
isExpectedResult BackjumpLimit = True
isExpectedResult Timeout = True
isExpectedResult PkgNotFound = False
isExpectedResult Unknown = False
isExpectedResult Solution = True
isExpectedResult NoInstallPlan = True
isExpectedResult BackjumpLimit = True
isExpectedResult Timeout = True
isExpectedResult Unbuildable = True
isExpectedResult UnbuildableDep = True
isExpectedResult ComponentCycle = True
isExpectedResult ModReexpIssue = True
isExpectedResult PkgNotFound = False
isExpectedResult Unknown = False

-- Combine CabalResults from multiple trials. Ignoring timeouts, all results
-- should be the same. If they aren't the same, we returns Unknown.
Expand Down
2 changes: 2 additions & 0 deletions solver-benchmarks/solver-benchmarks.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ library
base,
bytestring,
Cabal >= 2.3,
directory,
filepath,
optparse-applicative,
process,
time,
Expand Down
30 changes: 28 additions & 2 deletions validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ CABALSUITETESTS=true
CABALONLY=false
DEPSONLY=false
DOCTEST=false
BENCHMARKS=false
VERBOSE=false

# Help
Expand All @@ -23,7 +24,7 @@ show_usage() {
cat <<EOF
./validate.sh - build & test
Usage: ./validate.sh [ -j JOBS | -l | -C | -c | -s | -w HC | -x CABAL | -y CABALPLAN | -d | -D | -v ]
Usage: ./validate.sh [ -j JOBS | -l | -C | -c | -s | -w HC | -x CABAL | -y CABALPLAN | -d | -D | -b | -v ]
A script which runs all the tests.
Available options:
Expand All @@ -37,6 +38,7 @@ Available options:
-y CABALPLAN With cabal-plan
-d Build dependencies only
-D Run doctest
-b Run benchmarks (quick run, verify they work)
-v Verbose
EOF
exit 0
Expand Down Expand Up @@ -112,7 +114,7 @@ footer() {
# getopt
#######################################################################

while getopts 'j:lCcsw:x:y:dDv' flag; do
while getopts 'j:lCcsw:x:y:dDbv' flag; do
case $flag in
j) JOBS="$OPTARG"
;;
Expand All @@ -134,6 +136,8 @@ while getopts 'j:lCcsw:x:y:dDv' flag; do
;;
D) DOCTEST=true
;;
b) BENCHMARKS=true
;;
v) VERBOSE=true
;;
?) show_usage
Expand Down Expand Up @@ -170,6 +174,7 @@ cabal-testsuite: $CABALSUITETESTS
library only: $CABALONLY
dependencies only: $DEPSONLY
doctest: $DOCTEST
benchmarks: $BENCHMARKS
verbose: $VERBOSE
EOF
Expand Down Expand Up @@ -343,6 +348,27 @@ CMD="$($CABALPLANLISTBIN cabal-testsuite:exe:cabal-tests) --builddir=$CABAL_TEST

fi # CABALSUITETESTS

# solver-benchmarks
#######################################################################

if $BENCHMARKS; then
echo "$CYAN=== solver-benchmarks: build =========================== $(date +%T) === $RESET"

timed $CABALNEWBUILD solver-benchmarks:hackage-benchmark solver-benchmarks:unit-tests --enable-tests

echo "$CYAN=== solver-benchmarks: test ============================ $(date +%T) === $RESET"

CMD="$($CABALPLANLISTBIN solver-benchmarks:test:unit-tests)"
(cd Cabal && timed $CMD) || exit 1

echo "$CYAN=== solver-benchmarks: run ============================= $(date +%T) === $RESET"

SOLVEPKG=Chart-diagrams
CMD="$($CABALPLANLISTBIN solver-benchmarks:exe:hackage-benchmark) --cabal1=$CABAL --cabal2=$($CABALPLANLISTBIN cabal-install:exe:cabal) --trials=5 --packages=$SOLVEPKG --print-trials"
(cd Cabal && timed $CMD) || exit 1

fi

# END
#######################################################################

Expand Down

0 comments on commit 787b1f2

Please sign in to comment.