Merge pull request #249 from GaloisInc/raw_bc_tests

Add ability to test based on pre-generated bitcode files.
GaloisInc · Aug 15, 2023 · 9380173 · 9380173
2 parents 3eab22d + 3fc2bf7
commit 9380173
Show file tree

Hide file tree

Showing 4 changed files with 141 additions and 31 deletions.
diff --git a/disasm-test/Main.hs b/disasm-test/Main.hs
@@ -91,8 +91,8 @@ descr = PP.vcat $
   , "                     ^   `-[llvm-disasm]---> .ll"
   , "                     |                   `-> .AST"
   , " .c --[clang]--------+                         |"
-  , "                                             [show]"
-  , "                                               |"
+  , "                     |                        [show]"
+  , " .bc -[pre-existing]-+                         |"
   , "                                               v"
   , "      [compare first and second of these:]   .txt"
   , ""
@@ -334,20 +334,24 @@ main =  do
   sweets1 <- TS.findSugar $ assemblyCube llvmAsVC
   sweets2 <- TS.findSugar $ cCompilerCube llvmAsVC
   sweets3 <- TS.findSugar $ ccCompilerCube llvmAsVC
+  sweets4 <- TS.findSugar $ bitcodeCube llvmAsVC
   atests <- TS.withSugarGroups sweets1 testGroup
             $ \s _ e -> runAssemblyTest llvmAsVC knownBugs s e
   ctests <- TS.withSugarGroups sweets2 testGroup
             $ \s _ e -> runCompileTest llvmAsVC knownBugs s e
   cctests <- TS.withSugarGroups sweets3 testGroup
              $ \s _ e -> runCompileTest llvmAsVC knownBugs s e
+  bctests <- TS.withSugarGroups sweets4 testGroup
+             $ \s _ e -> runRawBCTest llvmAsVC knownBugs s e
   let tests = atests <> ctests
   case TR.tryIngredients
          (disasmTestIngredients llvmAsVC)
          disasmOpts
          (testGroup "Disassembly tests"
-          [ testGroup (showVC llvmAsVC) atests
-          , testGroup (showVC clangVC) ctests
-          , testGroup (showVC clangVC) cctests
+          [ testGroup ("llvm-as " <> showVC llvmAsVC) atests
+          , testGroup ("C " <> showVC clangVC) ctests
+          , testGroup ("C++ " <> showVC clangVC) cctests
+          , testGroup ("rawBC " <> showVC llvmAsVC) bctests
           ]) of
     Nothing ->
       hPutStrLn IO.stderr
@@ -433,17 +437,22 @@ runAssemblyTest llvmVersion knownBugs sweet expct
                Nothing   -> return ()
                Just ast1 ->
                  -- Re-assemble and re-disassemble
-                 with2Files (processLL pfx parsed1) $ \(_, Just ast2) -> do
-                 diffCmp ast1 ast2 -- Ensure that the ASTs match
-
-                 -- Ensure that the disassembled files match.  This is usually
-                 -- too strict (and doesn't really provide more info).  We
-                 -- normalize the AST (see below) to ensure that the ASTs match
-                 -- modulo metadata numbering, but the equivalent isn't possible
-                 -- for the assembly: we need llvm-as to be able to re-assemble
-                 -- it.
-                 --
-                 -- diffCmp parsed1 parsed2
+                 with2Files (processLL pfx parsed1)
+                 $ \(_, mb'ast2) ->
+                     case mb'ast2 of
+                       Just ast2 -> diffCmp ast1 ast2 -- Ensure that the ASTs match
+
+                                    -- Ensure that the disassembled files match.
+                                    -- This is usually too strict (and doesn't
+                                    -- really provide more info).  We normalize
+                                    -- the AST (see below) to ensure that the
+                                    -- ASTs match modulo metadata numbering, but
+                                    -- the equivalent isn't possible for the
+                                    -- assembly: we need llvm-as to be able to
+                                    -- re-assemble it.
+                                    --
+                                    -- diffCmp parsed1 parsed2
+                       Nothing -> error "Failed processLL"
 
 
 diffCmp :: FilePath -> FilePath -> TestM ()
@@ -480,7 +489,10 @@ processLL pfx f = do
 
 parseBC :: FilePath -> FilePath -> TestM (FilePath, Maybe FilePath)
 parseBC pfx bc = do
-  withFile (disasmBitCode pfx bc) $ \ norm -> do
+  withFile (X.handle
+            (\(_ :: GE.IOException) -> return "LLVM llvm-dis failed to parse this file")
+            (disasmBitCode pfx bc))
+    $ \ norm -> do
     (parsed, ast) <- processBitCode pfx bc
     Details dets <- gets showDetails
     when dets $ liftIO $ do
@@ -554,8 +566,53 @@ runCompileTest llvmVersion knownBugs sweet expct = do
                 -- Assemble and re-parse the bitcode to make sure it can be
                 -- round-tripped successfully.
                 with2Files (processLL pfx parsed1)
-                $ \(_, Just ast2) -> diffCmp ast1 ast2
-                  -- .ll files are not compared; see runAssemblyTest for details.
+                $ \(_, mb'ast2) -> case mb'ast2 of
+                                     Just ast2 -> diffCmp ast1 ast2
+                                     Nothing -> error "failed processLL"
+                  -- fst is ignored because .ll files are not compared; see
+                  -- runAssemblyTest for details.
+
+
+----------------------------------------------------------------------
+-- Pre-existing bitcode tests tests
+
+bitcodeCube :: VersionCheck -> TS.CUBE
+bitcodeCube llvmver = (assemblyCube llvmver)
+                        { TS.rootName = "*.bc"
+                        , TS.inputDirs = ["disasm-test/bc_src_tests"]
+                        , TS.sweetAdjuster = rangeMatch llvmver
+                        }
+
+runRawBCTest :: VersionCheck -> KnownBugs -> TS.Sweets -> TS.Expectation
+               -> IO [TestTree]
+runRawBCTest llvmVersion knownBugs sweet expct = do
+  shouldSkip <- skipTest expct
+  let tmod = if shouldSkip
+             then ignoreTestBecause "not valid for this LLVM version"
+             else case isKnownBug knownBugs sweet expct llvmVersion of
+                    Just (from, why) ->
+                      expectFailBecause $ why <> " [see " <> from <> "]"
+                    Nothing -> id
+  let pfx = TS.rootBaseName sweet
+  let bc = TS.rootFile sweet
+  return $ (:[]) $ tmod
+    $ testCaseM llvmVersion pfx
+    $ with2Files (parseBC pfx bc)
+        $ \(parsed1, ast) ->
+            case ast of
+              Nothing ->
+                -- No round trip, so this just verifies that the bitcode could be
+                -- parsed without generating an error.
+                return ()
+              Just ast1 ->
+                -- Assemble and re-parse the bitcode to make sure it can be
+                -- round-tripped successfully.
+                with2Files (processLL pfx parsed1)
+                $ \(_, mb'ast2) -> case mb'ast2 of
+                                     Just ast2 -> diffCmp ast1 ast2
+                                     Nothing -> error "Failed processLL"
+                  -- fst is ignored because .ll files are not compared; see
+                  -- runAssemblyTest for details.
 
 
 ----------------------------------------------------------------------
@@ -607,9 +664,7 @@ assembleToBitCode pfx file = do
   LLVMAs asm <- gets llvmAs
   X.bracketOnError
     (liftIO $ openBinaryTempFile tmp (pfx <.> "bc"))
-    (\(bc,_) -> do exists <- liftIO $ doesFileExist bc
-                   when exists $ rmFile bc
-    )
+    (rmFile . fst)
     $ \(bc,h) ->
         do liftIO $ hClose h
            callProc asm ["-o", bc, file]
@@ -623,9 +678,7 @@ compileToBitCode pfx file = do
   let comp = if ".cc" `isSuffixOf` file then comp' <> "++" else comp'
   X.bracketOnError
     (liftIO $ openBinaryTempFile tmp (pfx <.> "bc"))
-    (\(bc,_) -> do exists <- liftIO $ doesFileExist bc
-                   when exists $ rmFile bc
-    )
+    (rmFile . fst)
     $ \(bc,h) ->
         do liftIO $ hClose h
            callProc comp ["-c", "-emit-llvm", "-O0", "-g", "-o", bc, file]
@@ -639,9 +692,7 @@ disasmBitCode pfx file = do
   LLVMDis dis <- gets llvmDis
   X.bracketOnError
     (liftIO $ openTempFile tmp (pfx ++ "llvm-dis" <.> "ll"))
-    (\(norm,_) -> do exists <- liftIO $ doesFileExist norm
-                     when exists $ rmFile norm
-    )
+    (rmFile . fst)
     $ \(norm,h) ->
         do liftIO $ hClose h
            callProc dis ["-o", norm, file]
@@ -801,9 +852,11 @@ with2Files iofiles f =
 rmFile :: FilePath -> TestM ()
 rmFile tmp = do Keep keep <- gets keepTemp
                 unless keep
-                  $ do Details dets <- gets showDetails
-                       when dets $ liftIO $ putStrLn $ "## Removing " <> tmp
-                       liftIO $ removeFile tmp
+                  $ do do exists <- liftIO $ doesFileExist tmp
+                          when exists $ do
+                            Details dets <- gets showDetails
+                            when dets $ liftIO $ putStrLn $ "## Removing " <> tmp
+                            liftIO $ removeFile tmp
 
 ----------------------------------------------------------------------
 

diff --git a/disasm-test/bc_src_tests/README.md b/disasm-test/bc_src_tests/README.md
@@ -0,0 +1,53 @@
+This directory contains various LLVM bitcode files that should be used as the
+inputs to the `disasm_test` process.
+
+# Background
+
+There are multiple types of inputs to disasm_test:
+  * `.ll` files, which are converted to bitcode files via `llvm-as`
+  * `C`/`C++` files, which are converted to bitcode files via `clang`
+  * raw bitcode files, found here.
+
+As described by disasm-test, once a bitcode file is available, it is converted
+back into a text `.ll` format by LLVM's `llvm-dis` and this package's parsing
+library + `llvm-pretty` pretty-printing.  If that is successful, the resulting
+`.ll` file from the second method is used as the new input to repeat the above
+process, and the two results are compared for equivalence.
+
+# This Directory
+
+The contribution from *this* directory are bitcode-format files that are not--as
+part of this testing--generated from a `.ll` or `C` or `C++` file.
+
+Because the contents of this directory are not human readable (and usually
+generated from some other source/toolset), this README should be extended to
+include a description of what each bitcode file is intended to test and how it
+was generated (even if it wasn't generated locally).
+
+The tasty-sweet expectations will look in this directory for a `.bc` file as the
+root file, with a `.ll` file as the expected file, but (as described in the
+Purpose section below), the `.ll` represents the form of the `.bc` that is
+obtained via this parsing library (plus `llvm-pretty` pretty-printing) and not
+necessarily the original `.bc` file.  The contents of the `.ll` files here are as
+described in the `disasm-test/README.md`.
+
+# Purpose
+
+A primary example of this type of file is a bitcode file generated from Apple's
+modified CLANG/LLVM toolset; these files may contain metadata specifications that
+are unique to Apple and not merged into the upstream LLVM tools.  The
+`llvm-pretty-bc-parser` library should be permissive on parsing when it does not
+impact the binary code executed, and thus, this library should admit the
+Apple-specific bitcode even though the Apple-specific metadata will be dropped.
+Since the disasm-test tests run in multiple environments, the actual
+bitcode-format files from the Apple toolset is added directly here.
+
+# Test Manifest
+
+* `hello-world.bc` : This is a simple test to verify this set of test inputs.  It
+  is actually generated from `hello-world.ll` and should therefore return
+  success; it represents the benchmark for the other tests collected in this
+  directory.
+
+  Generated on Linux via: `llvm-as hello-world.ll -o hello-world.bc`
+  with `llvm-as` version 11.1.0.
diff --git a/disasm-test/bc_src_tests/hello-world.bc b/disasm-test/bc_src_tests/hello-world.bc
diff --git a/disasm-test/bc_src_tests/hello-world.ll b/disasm-test/bc_src_tests/hello-world.ll
@@ -0,0 +1,4 @@
+
+define void @hello_world() {
+	ret void
+}