Merge pull request #6447 from phadej/grayjay-update-solver-benchmarks

Grayjay update solver benchmarks
haskell · Dec 18, 2019 · 787b1f2 · 787b1f2
2 parents 8f42d3f + 676ffbd
commit 787b1f2
Show file tree

Hide file tree

Showing 5 changed files with 91 additions and 22 deletions.
diff --git a/.docker/validate-8.8.1.dockerfile b/.docker/validate-8.8.1.dockerfile
@@ -49,4 +49,4 @@ RUN     cabal v2-install -w ghc-8.8.1 --lib \
 # Validate
 WORKDIR /build
 COPY    . /build
-RUN     sh ./validate.sh -w ghc-8.8.1 -v -D
+RUN     sh ./validate.sh -w ghc-8.8.1 -v -D -b
diff --git a/cabal.project.validate b/cabal.project.validate
@@ -1,4 +1,4 @@
-packages: Cabal/ cabal-testsuite/ cabal-install/
+packages: Cabal/ cabal-testsuite/ cabal-install/ solver-benchmarks/
 
 write-ghc-environment-files: never
 

diff --git a/solver-benchmarks/HackageBenchmark.hs b/solver-benchmarks/HackageBenchmark.hs
@@ -27,7 +27,9 @@ import Statistics.Test.MannWhitneyU ( PositionTest(..), TestResult(..)
                                     , mannWhitneyUCriticalValue
                                     , mannWhitneyUtest)
 import Statistics.Types (PValue, mkPValue)
+import System.Directory (getTemporaryDirectory)
 import System.Exit (ExitCode(..), exitFailure)
+import System.FilePath ((</>))
 import System.IO ( BufferMode(LineBuffering), hPutStrLn, hSetBuffering, stderr
                  , stdout)
 import System.Process ( StdStream(CreatePipe), CreateProcess(..), callProcess
@@ -56,6 +58,10 @@ data CabalResult
   = Solution
   | NoInstallPlan
   | BackjumpLimit
+  | Unbuildable
+  | UnbuildableDep
+  | ComponentCycle
+  | ModReexpIssue
   | PkgNotFound
   | Timeout
   | Unknown
@@ -81,14 +87,14 @@ hackageBenchmarkMain = do
   -- "trial" or "summary".
   when argPrintTrials $ putStr $ printf "%-16s " "trial/summary"
   putStrLn $
-      printf "%-*s %-13s %-13s %11s %11s %11s %11s %11s"
+      printf "%-*s %-14s %-14s %11s %11s %11s %11s %11s"
              nameColumnWidth "package" "result1" "result2"
              "mean1" "mean2" "stddev1" "stddev2" "speedup"
 
   forM_ pkgs $ \pkg -> do
     let printTrial msgType result1 result2 time1 time2 =
             putStrLn $
-            printf "%-16s %-*s %-13s %-13s %10.3fs %10.3fs"
+            printf "%-16s %-*s %-14s %-14s %10.3fs %10.3fs"
                    msgType nameColumnWidth (unPackageName pkg)
                    (show result1) (show result2)
                    (diffTimeToDouble time1) (diffTimeToDouble time2)
@@ -125,7 +131,7 @@ hackageBenchmarkMain = do
       if isSignificantResult result1 result2
           || isSignificantTimeDifference argPValue ts1 ts2
       then putStrLn $
-           printf "%-*s %-13s %-13s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
+           printf "%-*s %-14s %-14s %10.3fs %10.3fs %10.3fs %10.3fs %10.3f"
                   nameColumnWidth (unPackageName pkg)
                   (show result1) (show result2) mean1 mean2 stddev1 stddev2 speedup
       else when (argPrintTrials || argPrintSkippedPackages) $
@@ -169,10 +175,37 @@ hackageBenchmarkMain = do
 runCabal :: Int -> FilePath -> [String] -> PackageName -> IO CabalTrial
 runCabal timeoutSeconds cabal flags pkg = do
   ((exitCode, err), time) <- timeEvent $ do
+    tmpDir <- getTemporaryDirectory
+
     let timeout = "timeout --foreground -sINT " ++ show timeoutSeconds
-        cabalCmd =
-            unwords $
-            [cabal, "install", unPackageName pkg, "--dry-run", "-v0"] ++ flags
+        cabalCmd = unwords $
+            [ cabal
+
+              -- A non-existent store directory prevents cabal from reading the
+              -- store, which would cause the size of the store to affect run
+              -- time.
+            , "--store-dir=" ++ (tmpDir </> "non-existent-store-dir")
+
+            , "v2-install"
+
+              -- These flags prevent a Cabal project or package environment from
+              -- affecting the install plan.
+            , "--ignore-project"
+            , "--package-env=non-existent-package-env"
+
+              -- --lib allows solving for packages with libraries or
+              -- executables.
+            , "--lib"
+
+            , unPackageName pkg
+
+            , "--dry-run"
+
+              -- The test doesn't currently handle stdout, so we suppress it
+              -- with silent. nowrap simplifies parsing the errors messages.
+            , "-vsilent+nowrap"]
+
+             ++ flags
         cmd = (shell (timeout ++ " " ++ cabalCmd)) { std_err = CreatePipe }
 
     -- TODO: Read stdout and compare the install plans.
@@ -182,12 +215,16 @@ runCabal timeoutSeconds cabal flags pkg = do
   let exhaustiveMsg =
           "After searching the rest of the dependency tree exhaustively"
       result
-        | exitCode == ExitSuccess                                  = Solution
-        | exitCode == ExitFailure 124                              = Timeout
-        | fromString exhaustiveMsg `B.isInfixOf` err               = NoInstallPlan
-        | fromString "Backjump limit reached" `B.isInfixOf` err    = BackjumpLimit
-        | fromString "There is no package named" `B.isInfixOf` err = PkgNotFound
-        | otherwise                                                = Unknown
+        | exitCode == ExitSuccess                                                          = Solution
+        | exitCode == ExitFailure 124                                                      = Timeout
+        | fromString exhaustiveMsg `B.isInfixOf` err                                       = NoInstallPlan
+        | fromString "Backjump limit reached" `B.isInfixOf` err                            = BackjumpLimit
+        | fromString "none of the components are available to build" `B.isInfixOf` err     = Unbuildable
+        | fromString "Dependency on unbuildable" `B.isInfixOf` err                         = UnbuildableDep
+        | fromString "Dependency cycle between the following components" `B.isInfixOf` err = ComponentCycle
+        | fromString "Problem with module re-exports" `B.isInfixOf` err                    = ModReexpIssue
+        | fromString "There is no package named" `B.isInfixOf` err                         = PkgNotFound
+        | otherwise                                                                        = Unknown
   return (CabalTrial time result)
 
 isSampleLargeEnough :: PValue Double -> Int -> Bool
@@ -224,12 +261,16 @@ isSignificantResult r1 r2 = r1 /= r2 || not (isExpectedResult r1)
 
 -- Is this result expected in a benchmark run on all of Hackage?
 isExpectedResult :: CabalResult -> Bool
-isExpectedResult Solution      = True
-isExpectedResult NoInstallPlan = True
-isExpectedResult BackjumpLimit = True
-isExpectedResult Timeout       = True
-isExpectedResult PkgNotFound   = False
-isExpectedResult Unknown       = False
+isExpectedResult Solution       = True
+isExpectedResult NoInstallPlan  = True
+isExpectedResult BackjumpLimit  = True
+isExpectedResult Timeout        = True
+isExpectedResult Unbuildable    = True
+isExpectedResult UnbuildableDep = True
+isExpectedResult ComponentCycle = True
+isExpectedResult ModReexpIssue  = True
+isExpectedResult PkgNotFound    = False
+isExpectedResult Unknown        = False
 
 -- Combine CabalResults from multiple trials. Ignoring timeouts, all results
 -- should be the same. If they aren't the same, we returns Unknown.

diff --git a/solver-benchmarks/solver-benchmarks.cabal b/solver-benchmarks/solver-benchmarks.cabal
@@ -30,6 +30,8 @@ library
     base,
     bytestring,
     Cabal >= 2.3,
+    directory,
+    filepath,
     optparse-applicative,
     process,
     time,

diff --git a/validate.sh b/validate.sh
@@ -14,6 +14,7 @@ CABALSUITETESTS=true
 CABALONLY=false
 DEPSONLY=false
 DOCTEST=false
+BENCHMARKS=false
 VERBOSE=false
 
 # Help
@@ -23,7 +24,7 @@ show_usage() {
 cat <<EOF
 ./validate.sh - build & test
 
-Usage: ./validate.sh [ -j JOBS | -l | -C | -c | -s | -w HC | -x CABAL | -y CABALPLAN | -d | -D | -v ]
+Usage: ./validate.sh [ -j JOBS | -l | -C | -c | -s | -w HC | -x CABAL | -y CABALPLAN | -d | -D | -b | -v ]
   A script which runs all the tests.
 
 Available options:
@@ -37,6 +38,7 @@ Available options:
   -y CABALPLAN   With cabal-plan
   -d             Build dependencies only
   -D             Run doctest
+  -b             Run benchmarks (quick run, verify they work)
   -v             Verbose
 EOF
 exit 0
@@ -112,7 +114,7 @@ footer() {
 # getopt
 #######################################################################
 
-while getopts 'j:lCcsw:x:y:dDv' flag; do
+while getopts 'j:lCcsw:x:y:dDbv' flag; do
     case $flag in
         j) JOBS="$OPTARG"
             ;;
@@ -134,6 +136,8 @@ while getopts 'j:lCcsw:x:y:dDv' flag; do
             ;;
         D) DOCTEST=true
             ;;
+        b) BENCHMARKS=true
+            ;;
         v) VERBOSE=true
             ;;
         ?) show_usage
@@ -170,6 +174,7 @@ cabal-testsuite:     $CABALSUITETESTS
 library only:        $CABALONLY
 dependencies only:   $DEPSONLY
 doctest:             $DOCTEST
+benchmarks:          $BENCHMARKS
 verbose:             $VERBOSE
 
 EOF
@@ -343,6 +348,27 @@ CMD="$($CABALPLANLISTBIN cabal-testsuite:exe:cabal-tests) --builddir=$CABAL_TEST
 
 fi # CABALSUITETESTS
 
+# solver-benchmarks
+#######################################################################
+
+if $BENCHMARKS; then
+echo "$CYAN=== solver-benchmarks: build =========================== $(date +%T) === $RESET"
+
+timed $CABALNEWBUILD solver-benchmarks:hackage-benchmark solver-benchmarks:unit-tests --enable-tests
+
+echo "$CYAN=== solver-benchmarks: test ============================ $(date +%T) === $RESET"
+
+CMD="$($CABALPLANLISTBIN solver-benchmarks:test:unit-tests)"
+(cd Cabal && timed $CMD) || exit 1
+
+echo "$CYAN=== solver-benchmarks: run ============================= $(date +%T) === $RESET"
+
+SOLVEPKG=Chart-diagrams
+CMD="$($CABALPLANLISTBIN solver-benchmarks:exe:hackage-benchmark) --cabal1=$CABAL --cabal2=$($CABALPLANLISTBIN cabal-install:exe:cabal) --trials=5 --packages=$SOLVEPKG --print-trials"
+(cd Cabal && timed $CMD) || exit 1
+
+fi
+
 # END
 #######################################################################