From 05e7e8a2c0407518d4de203d055be0679195728d Mon Sep 17 00:00:00 2001 From: Tarun Prabhu Date: Thu, 8 Feb 2024 14:52:38 -0700 Subject: [PATCH] This is a squash of the OpenCilk repo onto LLVM 17's release branch. All credit for OpenCilk goes to the individuals listed in the commit message below. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9e7b5b83a06d24f18d53bce2ae84ad23e8629566 Author: TB Schardl Date: Fri Nov 24 14:35:53 2023 +0000 [test/Examples] Fix Tapir-Kaleidoscope test to check Tapir-IR code generation and execution as serial code. commit edf18d2cb3ccae6b82e8fc2bb724b6b4e210135f Author: TB Schardl Date: Fri Nov 24 13:58:07 2023 +0000 [examples] Add command-line options to control IR printing in Tapir-Kaleidoscope example. commit 371c986883d3759cd48edb1570238d3e39d02b03 Author: TB Schardl Date: Fri Nov 24 13:57:08 2023 +0000 [examples] Fix Tapir Kaleidoscope example to use new pass manager and updated OrcJIT interface. commit e551f77d3ee462c4c0ac35155d69ffa7c892ef91 Author: TB Schardl Date: Fri Nov 24 13:52:08 2023 +0000 Code cleanup and formatting. commit 6ef312d8799d05324d956a819c2e078505d00ee4 Author: TB Schardl Date: Sun Nov 12 20:10:57 2023 +0000 Fix bugs for rebase onto LLVM 17.0.4 commit 1f6d36945cbb4529b7a83e5eb557f02f7e9c4dfc Author: TB Schardl Date: Sun Oct 22 19:12:01 2023 +0000 [SimpleLoopUnswitch] Fix nontrivial loop-unswitching to work with Tapir, and restore relevant regression tests. commit e5aca09467aa9f5174bbaeba85aa667c5f055006 Author: TB Schardl Date: Sat Oct 21 00:31:59 2023 +0000 [AArch64] Modify read of threadpointer to be marked that it may load memory, rather than having unmodeled side effects. commit 052ab4e90209797c6da67f78f71255a79e71f3c8 Author: TB Schardl Date: Mon Oct 9 10:26:09 2023 +0000 [AArch64] Mark the read of the threadpointer has having unmodeled side effects to prevent misoptimization in programs where the executing thread may change across a function call. In principle, this change could be made more precise to enable some optimizations of this operation, including removal of redundant reads when there is no intervening function call. But it is not clear what attributes currently exist that could be attached to this operation to model its behavior more precisely. commit 8754ae43760485e9a168254f322816760871396d Author: TB Schardl Date: Mon Oct 9 10:25:43 2023 +0000 [test/Tapir] Fix requirement for test that needs X86 target. commit 6e75058f3cc757eb7ef959dcd3d1361d20595338 Author: TB Schardl Date: Fri Oct 20 00:15:22 2023 +0000 [InstCombine] Fix removal of adjacent tapir.runtime.start and tapir.runtime.end calls. commit b7220ab142f3e839b1dafc142b2213d561c05470 Author: TB Schardl Date: Wed Oct 18 14:15:46 2023 +0000 [AddressSanitizer] Analyze parallel-promotable allocas before ASan's instrumentation might invalidate task analysis. commit 95f8a8edb37b999745b0bb8772149c744a68eddc Author: TB Schardl Date: Sat Oct 14 11:48:49 2023 +0000 [JumpThreading] Fix jump threading to remove pointers to deleted basic blocks that contain Tapir instructions. commit aeeb5b1b1a582ec850bf960cbb349f5d021332eb Author: TB Schardl Date: Fri Oct 13 23:27:59 2023 +0000 [DRFScopedNoAliasAA] Fix a compiler warning. commit 76c49de053a605163c212d5f59ccccc6600608e4 Author: TB Schardl Date: Fri Oct 13 23:25:14 2023 +0000 [Tapir,Cilk] Fix a memory error and some memory leaks. commit a6641c8c2561e98f356435e936711584c81fbfeb Author: John F. Carr Date: Tue Oct 10 15:02:02 2023 -0400 Set memory(none) instead of readonly on strand_pure function definition commit 317b6d5ca7bcc44dd91771a65cd793b7f12b6408 Author: TB Schardl Date: Sat Oct 7 15:05:58 2023 +0000 [Attributes] Make sure that inlining propagates the stealable attribute on a function to callers. commit ec78c76b0a01faf207a28c66d1fff4ba9411500a Author: TB Schardl Date: Sat Sep 30 03:08:44 2023 +0000 [InlineFunction] Fix inlining of detaches with no unwinds into taskframes with unwind destinations. commit 4ebb3d94a206fd1222febaa618bf213f17cf84c1 Author: TB Schardl Date: Mon Sep 25 01:09:17 2023 +0000 [LoweringUtils] Ensure that calls to outline helper functions have proper debug information. commit 2686e68573527096ce8bb146777a2c7dd0cce785 Author: TB Schardl Date: Mon Sep 25 01:07:47 2023 +0000 [CilkSanitizer,CSI] Skip allocas in entry blocks when finding the initial place to insert instrumentation. This change ensures that allocas in spawning functions are on the original stack, allowing ASan to properly instrument those allocas without being disrupted by stack switching. Fixes issue OpenCilk/opencilk-project#197. commit 51ffb8acf34131b32f5b80e5ceb1011d9cd1a935 Author: TB Schardl Date: Mon Sep 25 00:56:19 2023 +0000 [CSI] Assign numbers to local sync regions and pass those numbers to Tapir CSI hooks, instead of a separate stack allocation. commit 0019cd04c112b31fbd6066ff0c85f572ba39e1c3 Author: TB Schardl Date: Mon Sep 25 00:46:05 2023 +0000 [SelectionDAGBuilder] Provide default serial lowering for task.frameaddress intrinsic. Fixes issue OpenCilk/opencilk-project#198. commit 17993c6aa8022d2f71d93044e7d0ac33d9a9f72f Author: TB Schardl Date: Mon Sep 25 00:21:46 2023 +0000 [SemaCilk] Throw error when a _Cilk_spawn spawns a return statement. Fixes issue OpenCilk/opencilk-project#194. commit 2525529acb32d203fbff77b60c8f30e490ff79ba Author: Tao B. Schardl Date: Sun Sep 17 09:20:04 2023 -0400 Update README.md Fix typos and add some extra notes. commit 0d946a072db5bea6a7bbb3a74d8ed36d21b78f8e Author: Tao B. Schardl Date: Sat Sep 16 14:13:50 2023 -0400 Simplify headers in README.md commit 59cdaa58f19432befdad8806db5f6bf5809d2c97 Author: Tao B. Schardl Date: Sat Sep 16 14:10:42 2023 -0400 Fix headers in README.md commit a488d8bf61588a8798888309bf85dfdb96e2b30f Author: Tao B. Schardl Date: Sat Sep 16 14:08:53 2023 -0400 Create README.md Add README that briefly overviews OpenCilk and how to use it. commit 5523fa6746cb7b67c112b52b89618602e1d9358c Author: TB Schardl Date: Fri Sep 15 12:33:50 2023 +0000 [README] Rename LLVM README file. commit dfdf56660eb70df5f2cd3c55717f7ed00f59461c Author: John F. Carr Date: Wed Sep 6 12:14:08 2023 -0400 Remove unused HyperToken attribute commit 5dcd8bcef85c451791da5f6b2717665a4d15c674 Author: TB Schardl Date: Fri Sep 1 20:21:27 2023 -0400 [test/Cilk] Fix tests to accommodate different constructor ABIs. commit a2a5e20b00da185d01f9caf510006efdadef560b Author: TB Schardl Date: Fri Sep 1 08:01:16 2023 -0400 [CilkSanitizer,CSI] Fix CFG setup to properly update the dominator tree and handle free functions. commit 13b7ac764d75e34b4b5c04ae093f274413334aca Author: John F. Carr Date: Thu Aug 31 14:33:25 2023 -0400 Use BuildBuiltinCallExpr for hyper_lookup too commit a78944f376cb512cd3d7069ed16454e019bc972f Author: John F. Carr Date: Wed Aug 30 16:42:08 2023 -0400 Use BuildBuiltinCallExpr to generate call to __builtin_addressof commit 5cac64f4f56f42e90e5eda94d494b14d833f99c5 Author: John F. Carr Date: Thu Aug 24 10:13:21 2023 -0400 Look through _Hyperobject in delete commit 8a409dfc1fc889ee3e4020208899dce3b2bc7c9b Author: TB Schardl Date: Sun Aug 20 13:53:50 2023 +0000 [github] Update workflows to run more tests, to run tests on pull requests, and to run tests on dev/ and ci/ branches. commit d8724a170163b266cd887f123efad6cb8f768e21 Author: TB Schardl Date: Sun Aug 20 13:42:13 2023 +0000 [CilkSanitizer,CSI] Instrument allocation and free functions as such even when the program is compiled with -fno-builtin. commit d96e33c920295ac1aa7abc929ecc64a4f145d4c7 Author: TB Schardl Date: Wed Aug 9 09:45:00 2023 +0000 [Intrinsics] Fix memory attributes of llvm.threadlocal.address intrinsic to prevent misoptimization of intrinsic in Cilk code. commit f2ebdcc6ab2c2600848c7743216d3250657864c1 Author: TB Schardl Date: Thu Aug 3 11:10:50 2023 +0000 [JumpThreading] Do not thread a detach-continue edge if the corresponding reattach-continue edge is not also threaded. commit 93fb23676c5a5ece416b1c4c1485ea6c3303bc24 Author: TB Schardl Date: Wed Jul 26 11:57:41 2023 +0000 [clang] Convert more instances of Optional to std::optional. commit 5e6c4f2974822d28abb52e851d86b318657e8a20 Author: TB Schardl Date: Sun Jul 23 22:30:06 2023 +0000 [test/Tapir] Remove tests using old pass manager. commit c8e60b12aa8c07a97a4ca57fab76e0bfde227317 Author: John F. Carr Date: Fri Jul 21 18:57:22 2023 -0400 Compute memory effects of outlined function commit 008a2e069ddb68c9843727d1f9d911eea2ca3dcd Author: John F. Carr Date: Thu Jul 20 20:32:17 2023 -0400 Use new memory effects interface for outlined function commit c3c430f335c47a7e142f0abac315a3a68d240c12 Author: TB Schardl Date: Fri Jul 14 14:25:59 2023 +0000 [LoopInfo] Fix bug in which getting task exits of a loop would exclude nested tasks. Fix issue OpenCilk/opencilk-project#177. commit e210d0c730d19dfac0cecb283e743f248e2d4904 Author: John F. Carr Date: Mon Jun 5 13:47:24 2023 -0400 Call overloaded unary operator on hyperobject commit 2c24a1354caa121ed28daa9c5f7149063761afa5 Author: John F. Carr Date: Mon Jun 5 15:34:56 2023 -0400 Allow hyperobject view lookup in overload resolution commit 2c8940f87fb5056b7f63630b49633dde363a6e1c Author: TB Schardl Date: Sat Jun 24 20:19:12 2023 +0000 [Tapir] Use internal linkage for generated helper functions, to ensure that they have symbols that tools can use. Fix issue OpenCilk/opencilk-project#172. commit 0f00524cbaf358995ecd61ce55b98939edc61cb1 Author: TB Schardl Date: Tue Jul 4 18:42:05 2023 +0000 [cmake] Remove unused logic for passing llvm-link path to external projects. commit 5dc8895b0f6da272e7c7f801bb7a2cd33675f980 Author: TB Schardl Date: Tue Jul 4 18:41:07 2023 +0000 [llvm-reduce] Ensure that Tapir instructions are replced properly when removing basic blocks. commit 10c6f3d2e768151744ea60526e1eb9158672ad06 Author: TB Schardl Date: Tue Jul 4 17:43:49 2023 +0000 [SROA] Maintain Tapir task info as SROA modifies the CFG. TODO: Add functionality to incrementally update Tapir task info analysis. commit 18b3fbe2e3aebb270dd9b7466a789b557b340d36 Author: TB Schardl Date: Mon Jul 3 16:30:07 2023 -0400 Fix bugs for rebase onto LLVM 16.0.6 commit ee2dba003c86a78ac8870b5d67889e6df5e97303 Author: TB Schardl Date: Sun Jun 4 13:18:11 2023 +0000 [PassBuilder] Create separate TapirLoopLowerPipeline for lowering Tapir loops, and add options to run Tapir lowering pipelines via opt. commit 750fd88a40d90d4a9aed3dd5e58052adde9f3019 Author: TB Schardl Date: Mon May 29 14:12:32 2023 +0000 [github] Reenable tests on GitHub Actions. commit 80110eff05f6edfd427af5293f380d990928fbfd Author: TB Schardl Date: Mon May 29 19:43:44 2023 -0400 [test/Tapir] Generalize SROA test to fix test failure on macOS. commit b22925221b7f476a3aaf836e2cba8e99031f0760 Author: TB Schardl Date: Mon May 29 14:02:09 2023 +0000 [SimplyCFG,TaskSimplify] Add hidden command-line options to disable optimization of removing detaches that immediately sync. commit e1a34b85ece70fff8b3c1b297d97a6962f5dfb5f Author: TB Schardl Date: Mon May 29 14:01:20 2023 +0000 [Tapir] Remove deprecated CudaABI and OpenMPABI Tapir targets. Add LambdaABI and OMPTaskABI Tapir targets from OpenCilk PPoPP'23 paper for targeting alternative parallel runtimes. commit e4d3b479a35685d95a49fac4e7cb4ff2dea4fc8a Author: TB Schardl Date: Mon May 22 01:52:35 2023 +0000 [Tapir] Code formatting. commit 7f44fab557139b1e61ccb9c7105b6b483c85f5be Author: TB Schardl Date: Mon May 22 01:51:39 2023 +0000 [InlineFunction] Fix insertion of landingpads for taskframes when multiple taskframes appear in the same basic block. commit 32f6f0be8204008f449f401c048363b7c6d35533 Author: TB Schardl Date: Mon May 22 01:48:29 2023 +0000 [TapirUtils] Modify FindTaskFrameCreateInBlock to optionally ignore a specific taskframe.create when searching a block for a taskframe.create. commit 1c76104684bcdd9c28b85d03a41fdc8120030fc0 Author: TB Schardl Date: Mon May 22 01:46:08 2023 +0000 [Verifier] Add check that the successor of a reattach has a predecessor terminated by a detach that dominates the reattach. commit e89d3bf0875a3da8f1ed852f589e96837b6c0310 Author: TB Schardl Date: Mon May 22 01:44:51 2023 +0000 [SSAUpdater] Fix handling of detach-continuation blocks with multiple detach and reattach predecessors. commit eb439ea8c2b10622dd9b28ad8ff29e301a9cb095 Author: TB Schardl Date: Thu May 18 17:39:20 2023 +0000 [OpenCilkABI] Fix OpenCilk target to ensure that any sync that can possibly throw an exception has an associated landingpad. commit a07679135eb1ac1d084815943feca0d4029a167b Author: TB Schardl Date: Tue Apr 18 01:14:31 2023 +0000 Fix bugs for rebase onto LLVM 15.0.7. commit 0b9ca933ffec4892d942a0725d3d3ff4763a3332 Author: Tao B. Schardl Date: Thu Apr 6 05:59:35 2023 -0400 Fix workaround in llvm-project-tests.yml for new macOS runner image. commit a475e5f5c47a70be9ca1e5777576cb61967c7748 Author: TB Schardl Date: Thu Mar 30 19:10:44 2023 +0000 [test/Tapir] Mark test that requires the X86 target as such. commit c98c75e6159ae279dd08cdf9af072b119bac32bf Author: TB Schardl Date: Sun Jan 29 20:21:12 2023 +0000 [CilkSanitizer] Special-case the instrumentation of hyper.lookup, to allow Cilksan library to implement its own handling of reducers separately from runtime system. commit 6396775792b42ee51d4e4f7a29629e2e0456770f Author: TB Schardl Date: Sat Jan 28 03:22:47 2023 +0000 [BasicAliasAnalysis,test/Cilk,test/Tapir] Fix test cases and alias analysis to accommodate new hyper.lookup signature. commit 09ac42508428f619c332ae0c27a4ac0b6e730ebc Author: TB Schardl Date: Wed Jan 4 00:31:50 2023 +0000 [CodeGen,Sema] Fix support for __hyper_lookup calls in dependent contexts. commit 56f889f3e21d6be19d9502af410138ce234af05a Author: TB Schardl Date: Sun Dec 18 22:03:43 2022 -0500 [Basic,CodeGen,Sema,IR,Tapir] Modify hyper_lookup intrinsic to pass additional information about the reducer being looked up, namely, its view size, identity function, and reduce function. commit 8ff678c882a5d1bd1e84495fcbefc49745807fcd Author: John F. Carr Date: Sat Feb 25 13:42:18 2023 -0500 Fix crashes on bad hyperobject declarations commit 4f43a60975c503c698dbde9aac31e33d8f731204 Author: TB Schardl Date: Wed Feb 1 03:23:48 2023 +0000 [CSI,CilkSanitizer] Identify detaches and their continuations that are associated with Tapir loops. commit 02dea84d7d5521638bae13bf88b132a540ee0f5b Author: TB Schardl Date: Tue Feb 7 11:36:49 2023 +0000 [TailRecursionElimination] Allow TRE to occur when a tail call is separated from a return by a sync and a tapir.runtime.end intrinsic. In such cases, TRE removes the tapir.runtime intrinsics altogether. commit b93ccc5bfb6c51905111c5bdac94c211328dda53 Author: TB Schardl Date: Tue Feb 7 11:34:51 2023 +0000 [TailRecursionElimination] Avoid performing tail-recursion elimination for a tail call followed by a sync if there exists a sync preceding that call in the function. TRE in this case can change the synchronization of the program, by causing some spawns to sync earlier than before. TRE in such cases appears to make parallel scalability worse. commit 99c8bf79d59311331c3839a45357aae9388cda17 Author: TB Schardl Date: Tue Feb 7 11:13:28 2023 +0000 [InstCombineCalls] Combine consective pairs of tapir.runtime intrinsics, to avoid needlessly stopping and resuming a parallel runtime. commit f7f8e08c655be46ab3b90808f1ed3a433fc11b43 Author: TB Schardl Date: Sat Feb 4 01:30:50 2023 +0000 [TapirUtils] Two changes involving optimizations with taskframes: - When serializing a detach, if the task contains a sync, replace the detach with a taskframe, to prevent that sync from synchronizing tasks in the parent. - Allow a taskframe containing allocas to be replaced with stacksave/stackrestore intrinsics. commit 32f4498946d58a21d0e2b5ca00c31e3083a2d414 Author: TB Schardl Date: Wed Feb 1 11:20:01 2023 +0000 [github] Update llvm-project-tests for new macOS 11 runner image. commit 8629b933e525db8b66fb29d7628f3697601825d0 Author: TB Schardl Date: Wed Feb 1 03:32:16 2023 +0000 [LoweringUtils] Identify blocks in taskframes that are shared outside of the taskframe, e.g., for __clang_call_terminate. These blocks may have PHI nodes that need to be updated during lowering. commit ebea8384f158dcbbcadd7228966776d01e39927a Author: TB Schardl Date: Sat Nov 12 20:27:51 2022 +0000 [Clang] Don't necessarily add the OpenCilk runtime bitcode ABI if a custom Tapir target is specified. commit 17c5191782250602488c7b0a3fce8f051aa330dc Author: John F. Carr Date: Tue Jan 3 16:22:19 2023 -0500 Test case for OpenCilk issue 157 commit 91590ae258da4a08b22e29ca0df060336b205083 Author: John F. Carr Date: Tue Jan 3 15:50:46 2023 -0500 Make isViewSet check the View flag instead of the Pure flag. Regression introduced by 52f8a61bc248d312da9269cfdddabad8ff51f9d6. commit b75ad9a29a76acdb52c84cf24930063988d19b6d Author: John F. Carr Date: Sat Dec 31 14:29:16 2022 -0500 Fix type checking of Cilk for loops with pointer loop variables commit 36ccb03b7c7ce2418bf72d995a18754fa1348ab6 Author: John F. Carr Date: Tue Dec 27 14:53:11 2022 -0500 Fix crash on type error in Cilk for statement commit 8f4bcb58ba33d7406263d80455c28aba5a7b218c Author: TB Schardl Date: Thu Dec 1 10:16:05 2022 -0500 [CSI] Ensure that all global variables CSI introduces have a name, because -flto expects all global symbols to have names. Fix issue #149. commit a6df8341db1ffe6ac967787dea8784af6988991f Author: TB Schardl Date: Thu Dec 1 07:17:25 2022 -0500 [test/Tapir] Add aarch64-target requirement to an aarch64 codegen test. commit e0b8e54b7d1c4f58632c141cc6a50cf103897402 Author: TB Schardl Date: Thu Dec 1 06:39:18 2022 -0500 [InlineFunction] Fix insertion of allocas when inlining function calls with byval arguments inside of tasks. Fix issue #148. commit 6991ce6ea07ee324c451a9360ac934ee04280d5f Author: TB Schardl Date: Wed Nov 9 20:25:31 2022 -0500 [Tapir] Adjust Tapir Target API to fix handling of analyses when a target handles Tapir instructions manually, i.e., without using the standard infrastructure to outline tasks. commit 7bc03ea1ca310534377dce4ba925c71e0b370af8 Author: TB Schardl Date: Tue Nov 8 11:50:14 2022 +0000 [InlineFunction,EHPersonalities] Add support for inlining functions that use the Cilk and GXX EH personalities. commit f623521ce37b8bf34a14b53f152e7b3f6e36153d Author: TB Schardl Date: Sun Nov 6 23:02:33 2022 +0000 [test/Cilk] Fix test for syncregion debug info to work on Linux. commit df1786d82e4a8ac49dc093748272517767c4cd00 Author: TB Schardl Date: Sun Nov 6 15:53:09 2022 -0500 [CGCilk,OpenCilkABI] Generate debug info on syncregion.start intrinsics to help ensure that the OpenCilk Tapir target can attach debug information to runtime-ABI calls it inserts. commit 166681fa315dd667633c11b6236207dfb0bf716b Author: TB Schardl Date: Fri Nov 4 00:06:26 2022 +0000 [OpenCilkABI] Fix typo in comments. commit aa64c25ee1e20078dc9375d3c2a4382cf5c8000f Author: TB Schardl Date: Fri Nov 4 00:05:26 2022 +0000 [InstCombine] Prevent InstCombine from sinking instructions from a continuation to after a sync instruction, as doing so is a pessimization. commit 22cb41d746a4367c9d689129c485a1393b1067c8 Author: John F. Carr Date: Wed Oct 26 09:56:54 2022 -0400 Demangle mangled hyperobject type commit ff0cb780b62bbc8c081414db8fc26ffe34c5b19f Author: TB Schardl Date: Sat Oct 22 09:05:16 2022 -0400 [CilkSanitizer] Spill complex arguments, including structures, onto the stack when passing them to hooks. commit 02690918b737504d0bf5ccfa1536a83b8009ae44 Author: TB Schardl Date: Thu Oct 20 22:10:13 2022 -0400 [DebugInfo] Fix compiler crash when calling findDbgValues for a Value that has a ConstantAsMetadata. commit 30e93e3013ba3aaf2e8dc4edea6394b2792bbcad Author: TB Schardl Date: Wed Oct 19 18:59:58 2022 -0400 [InlineFunction] Work around issue to allow inlining functions with different personalities where the caller is using the default personality. commit ddccecf6a5b4f0f9e316cb130fa6463af8e3ecc8 Author: TB Schardl Date: Wed Oct 19 18:58:41 2022 -0400 [TapirUtils] Fix logic to promote calls to invokes in taskframes when taskframe.end precedes another important terminator, such as a reattach. commit c7f6aae56a9039fd78d28b3312ebb1cde3d8a3b9 Author: TB Schardl Date: Wed Oct 19 17:58:17 2022 -0400 [TapirUtils] Fix logic to promote calls to invokes within tasks to handle cases where a taskframe comprises less than a single basic block. commit 7a808290c0045a23fcdc7b7ec6a27593a0be3a45 Author: TB Schardl Date: Thu Oct 13 10:46:41 2022 +0000 [TapirUtils] Support promoting calls to invokes in tasks that are not reachable. This functionality avoids compiler crashes on some codes that are instrumented with Sanitizers and compiled with no optimizations. commit ecacc75102deeea488a53c54f418d68906c484ad Author: TB Schardl Date: Thu Oct 13 10:39:50 2022 +0000 [TapirTaskInfo] Allow a task to use shared-EH spindles managed by an ancestor task that is not the immediate parent task. commit e1cdaaa5e896985c94d37af98e323ec1b1164dd0 Author: Tao B. Schardl Date: Fri Oct 7 14:19:55 2022 -0400 [github] Update llvm-project-tests.yml Fix include path for updated macOS runner commit cde7a91d7bc2c425367d93e52f7e07bd53ab0ab0 Author: TB Schardl Date: Mon Sep 19 01:21:53 2022 +0000 [test/Tapir] Update requirements on SLP-vectorization test. commit 307e3d7c452167b728ebcb20f256ae3c6651b506 Author: TB Schardl Date: Sun Sep 18 21:32:54 2022 +0000 [Passes] Fix pass pipeline to run CSE after SLP vectorization. Running CSE before SLP vectorization can disrupt the SLP vectorizer's ability to determine how to vectorize code. commit 7541c785ba2cbe3cb0c8b48f674ee0ebfca84096 Author: John F. Carr Date: Mon Sep 5 06:35:43 2022 -0400 Hyperobject lookups need special handling in any dependent context commit 0e31a81c0c1620e5526308057bd4c54433269e41 Author: TB Schardl Date: Sun Aug 28 15:47:19 2022 +0000 [test/Tapir] Add regression test for linking null bitcode module. commit a653bce91d9461be3ebecff8c1a8e4dcbdc389c3 Author: John F. Carr Date: Thu Aug 11 13:30:13 2022 -0400 Fill in all the missing pieces after failure to load bitcode commit 0b874f609e2fb47ba86d808c620d8b26dddac9f4 Author: John F. Carr Date: Thu Aug 11 13:04:03 2022 -0400 Do not try to link null bitcode module commit 096f06752fee07f997892321732065471f9cfcce Author: TB Schardl Date: Thu Aug 25 11:21:18 2022 +0000 [InlineFunction] Allow a function to be inlined into another with a different personality function if the callee simply uses the default personality function. This workaround addresses issue #127. commit 89918abac5655a23e4a83084fff2d72d09f3a1d1 Author: TB Schardl Date: Thu Aug 25 11:09:59 2022 +0000 [github] Make workflows consistent with workflows in mainline LLVM. commit 7a24497661eea1267b1df503542e04bd824d3dbc Author: TB Schardl Date: Mon Aug 1 13:29:44 2022 +0000 [CSI] Ignore unreachable basic blocks for instrumentation. Remove debug statement to fix issue #129. commit ab4e5b34f0fcb9c818267fdd0c800541f3d3a764 Author: TB Schardl Date: Sun Aug 21 02:01:32 2022 +0000 [github] Update llvm-project-tests based on upstream changes. commit c2f58be9762b72edf4ac7a038a18ce41ef4a07e6 Author: TB Schardl Date: Sun Aug 21 01:56:53 2022 +0000 [github] Disable issue-subscriber action. commit 35c54228228ef79e1458da1eb37a748f3f24ca2f Author: John F. Carr Date: Sun Jul 24 12:47:07 2022 -0400 Fix crash on undeclared reducer callback commit 672bb71d9272a0867562e147058810df2400151e Author: TB Schardl Date: Wed Jul 20 11:33:16 2022 +0000 [github] Update workflows for release. commit 615c152cec47318479e38c7e78aae6eadb8d5989 Author: TB Schardl Date: Tue Jul 19 13:42:35 2022 +0000 [ToolChain] When an OpenCilk resource directory is specified, add the include directory within that resource directory to the include path. commit 6d6cc7e8dc7ebf6144d4e339613fc8b06709d821 Author: TB Schardl Date: Mon Jul 18 11:58:48 2022 +0000 [CMakeLists] Add OpenCilk version number that is distinct from LLVM version number. commit 9493c3247b94dcccdafc1a9501933e81c0c2c395 Author: TB Schardl Date: Sun Jul 17 15:35:05 2022 +0000 [CSI][ThreadSanitizer] Fix promotion of calls to invokes within Tapir tasks. Fixes issue OpenCilk/opencilk-project#113. commit 3ee74ab8798c20c599fe8a410f40025f7da94333 Author: John F. Carr Date: Mon Jul 18 13:45:19 2022 -0400 Test for hyperobject with constructor but no destructor commit 683ff7fdeb85fc5943c71285c8cf4215f28b7f44 Author: John F. Carr Date: Mon Jul 18 10:17:13 2022 -0400 Fix test for trivial destructor commit cb5dbb6e36d31a3720eb294f28eb459e1e26af91 Author: TB Schardl Date: Thu Jul 14 11:44:37 2022 +0000 [github] Update workaround for building on macos-10.15 GitHub virtual environment. commit 461d443bb0eebbfda24d534bb8cda0f70624a232 Author: John F. Carr Date: Sun Jul 10 14:45:12 2022 -0400 Merge reducer destructor callback into reduce callback. commit 1930b5aef735a070dabdf5d1656ccbcfd6fb829f Author: John F. Carr Date: Sun Jul 10 14:23:01 2022 -0400 Visit statment children of HyperobjectType commit 9ff353d37b34ebc270792588675735e80ebc97af Author: TB Schardl Date: Sun Jul 10 20:48:53 2022 +0000 [CREDITS] Expand CREDITS.TXT to reflect recent contributions. commit 451ce36119e7547bcd4068976d15483c6cda1d8b Author: TB Schardl Date: Sun Jul 10 20:46:01 2022 +0000 [test/Tapir] Fix llvm test failues on non-x86 systems. commit 8321ebf8e6e19f46e4ceb48d465c590d9be65067 Author: TB Schardl Date: Sun Jul 10 20:45:24 2022 +0000 [test/Cilk] Fix clang test failures on non-x86 systems. commit a508e80246f815e447db539e4db81d8992064d9a Author: TB Schardl Date: Sun Jul 10 20:44:37 2022 +0000 [TapirUtils][SimplifyCFG][TaskSimplify] Remove taskframe intrinsics when serializing a detach where the task uses a taskframe. Clean up logic for serilizing detaches that immediately sync. commit bdede63111267ffc435ac880385ad6e74cce77ee Author: TB Schardl Date: Fri Jul 8 14:13:34 2022 +0000 [BasicAliasAnalysis] Convert #define's to an enum class, to match code style of similar structures in LLVM. commit 249353b3f8076eb8de84e9330cd61006894c7abb Author: TB Schardl Date: Fri Jul 8 11:51:48 2022 +0000 [TableGen] Change additions to CodeGenIntrinsic to match LLVM code style, to avoid merge conflicts down the road. commit cccd276bcc0cc541141e0cd6d8253e07bdcf8b09 Author: TB Schardl Date: Thu Jul 7 19:29:19 2022 +0000 [SROA] Make SROA preserve the atomicity of load and store operations when it rewrites load and store operations. commit 1eb273e8d4c545ce17190463f1287fd0468f94e3 Author: TB Schardl Date: Thu Jul 7 13:55:47 2022 +0000 [SimpleLoopUnswitch] Fix compiler crash when unswitching a parallel loop with task-exit blocks. commit 5e76b20e134f03620d6b8040aa8425e3a6abeb68 Author: TB Schardl Date: Wed Jul 6 23:48:28 2022 +0000 [bindings/python] Update diagnostics test. commit ed514c2530691a5624ef20f4f5b03583e80e69b3 Author: John F. Carr Date: Wed Jul 6 16:12:01 2022 -0400 Improve compatibility testing of hyperobject types commit 42a01ebcdcf8a9ac6db44ed9d5dae644f7d9ac42 Author: TB Schardl Date: Wed Jul 6 19:33:24 2022 +0000 Fix a little formatting with clang-format. commit f2959caf0f419591cbf4b26b3a823df6f02c70f6 Author: TB Schardl Date: Wed Jul 6 19:22:13 2022 +0000 [github] Cleanup path. commit 7d3ffda22243175e0c7b1a52ca8ba42d6c59b0f9 Author: TB Schardl Date: Wed Jul 6 19:21:52 2022 +0000 [IntrinsicEmitter] Fix handling of new intrinsics, which fixes spurious test failures. Restore previous tests marked XFAIL. commit 0489a6fbecf17ad12c007e8197aa1c9de8be1d7f Author: John F. Carr Date: Wed Jul 6 12:04:08 2022 -0400 Fix rebuild of hyperobject reference in template expansion commit 04db56e81f8b7d9684a08a47cf417079bf2ff019 Author: John F. Carr Date: Wed Jul 6 11:18:59 2022 -0400 Test of reducer += inside a template expansion commit f738cbe7884ba00f9b1b4e33793a983889c732cd Author: John F. Carr Date: Wed Jul 6 11:14:38 2022 -0400 Check that llvm.hyper.lookup is called only once commit 794bda39b9704f541960a1bc11f367f467e4bf62 Author: John F. Carr Date: Tue Jul 5 14:00:02 2022 -0400 FreeBSD sanitizer needs -lstdthreads for mtx_init and related functions commit 021495ff738c68589d08c7e86dddfd10e81ff182 Author: John F. Carr Date: Sun Jul 3 22:13:24 2022 -0400 Template instantiations with integer, null, or missing arguments can be hyperobjects commit 26c32b06b1860d4c85cb37cc24366f89930e36e7 Author: John F. Carr Date: Sun Jul 3 13:33:00 2022 -0400 Try to unregister reducers the correct number of times commit c3178cc4a30d5b10844ffc642ec101f41c62187d Author: John F. Carr Date: Fri Jul 1 12:04:29 2022 -0400 XFAIL test with duplicate unregister call commit 2bbb3e5fdc7bbd88bc7fa27895f3c9f15825b85b Author: John F. Carr Date: Wed Jun 29 16:09:34 2022 -0400 Fix rebuild of hyperobject in using declaration commit de6d421bb9029fa25e4f2edd0b322556145b7434 Author: John F. Carr Date: Tue Jun 21 13:57:40 2022 -0400 Clean up reducer callback order commit 635399a93aac7cbb7e7a7a76cafc790f23f30817 Author: John F. Carr Date: Fri Jun 17 08:04:02 2022 -0400 Stricter type checking of reducer callback commit 32b2d8cd83676f12099d61a340477b99660fb64d Author: John F. Carr Date: Thu Jun 16 16:54:12 2022 -0400 Handle overloaded function as reducer callback commit 1ad8fb0ec3f43beaaf9d02788e48dfca2662edf6 Author: John F. Carr Date: Thu Jun 16 11:01:22 2022 -0400 Add noundef to expected LLVM IR commit 61bea1ff3305b0121b912ad44ce5e0355404a0cd Author: John F. Carr Date: Thu Jun 16 09:57:52 2022 -0400 Syntactic hyperobjects commit 66e55994cb95312f4a25bb48c582cb8f0df70066 Author: TB Schardl Date: Wed Jul 6 15:28:20 2022 +0000 [OpenCilkABI] Replace a SmallVector with an array. commit 89d05d95e4ab7533c4b2526968f0150e8e80e19a Author: TB Schardl Date: Wed Jul 6 15:27:43 2022 +0000 [CilkSanitizer] Remove unused variables. commit 639dc0f74ca81837f888e146dd329b814ba24d2d Author: TB Schardl Date: Tue Jul 5 12:30:55 2022 +0000 [Driver] Remove code to select alternate OpenCilk bitcode ABI file when pedigrees are enabled. commit ec10d525876570d0ac64c8b0328561864c606a7d Author: TB Schardl Date: Thu Jun 30 21:18:09 2022 +0000 [OpenCilkABI] Adjust diagnostic handling for cleaner error messages. commit 5a9148a66b7bb8c394a841fbed0b866eaa98ab54 Author: TB Schardl Date: Thu Jun 30 21:15:34 2022 +0000 [CudaABI] Resolve compiler warning temporarily. commit 8cea22dd0f232c533076029988a18b08017a3611 Author: TB Schardl Date: Wed Jul 6 15:30:27 2022 +0000 [github] Update CPLUS_INCLUDE_PATH workaround to accommodate updates to macOS virtual environment. commit acef38cfe906cd7f6f01ee96b90ead9fd9a291b2 Author: TB Schardl Date: Mon Jun 27 01:17:39 2022 +0000 [CSI] Properly emit diagnostic messages raised when linking a tool bitcode file. commit d0c5eca736f43ebbd51c9b914dddd3c2fa5fbe67 Author: TB Schardl Date: Sun Jun 26 18:39:20 2022 +0000 [gold-plugin] Add plugin options for Tapir lowering target and OpenCilk ABI bitcode file. commit d2ae9283a4171d918e7632214a2a6f4b6d08c2ba Author: TB Schardl Date: Sun Jun 26 18:38:31 2022 +0000 [InlineFunction] Fix stack overflow when inlining an invoked function that contains taskframe intrinsics that have not been split. commit 3edbf6c56d15bd382f3f0b8abb05bb42ecfc3cdf Author: TB Schardl Date: Sun Jun 26 18:34:48 2022 +0000 [CilkSanitizer] Add logic to synthesize default hooks for library functions that are not present in a linked tool bitcode file. commit c9b864da8c317240e1d8396d491775e741feabea Author: TB Schardl Date: Sun Jun 26 18:33:23 2022 +0000 [CilkSanitizer] Clean up synthesis and handling of MAAP checks. Enable load-store coalescing when potential racing instruction is local but not in the same loop. Cleanup code. commit dcf56d3b579a57a8354c4ea83a1cd1bc2eef125f Author: TB Schardl Date: Sun Jun 26 18:29:51 2022 +0000 [CSI][CilkSanitizer] Add property bit for loads and stores that are guaranteed to access thread-local storage. commit 2c15ea0c37c09a373cdfefea70db475fd8688bb7 Author: TB Schardl Date: Sun Jun 26 18:26:46 2022 +0000 [TapirRaceDetect] Add method to get mod-ref information for a local race. Include accesses to thread-local variables for race detection. Cleanup code. commit 965c8cc439fbbaa71828e7f3a5d32c402e3743e0 Author: TB Schardl Date: Wed Feb 16 15:51:36 2022 +0000 [CSI][CilkSanitizer] Draft change to support using a bitcode-ABI file for Cilksan. commit d2850fcd14b47a54f8c899fea53dfb5ea38388c0 Author: TB Schardl Date: Wed Feb 16 15:46:39 2022 +0000 [CilkSanitizer] Add logic to aggressively search for debug information to attach to instrumentation hooks. commit c611385b3cd116c8acbe9c1bb90b987ae0829d2e Author: TB Schardl Date: Wed Feb 16 15:44:08 2022 +0000 [CilkSanitizer] Fix type of sync-region-number arguments in hooks to match Cilksan driver ABI. commit 4cd1695c100c85578f2fd2819be41680dbae35b9 Author: TB Schardl Date: Fri Jun 24 22:22:44 2022 +0000 [Kaleidoscope/Tapir] Update Tapir Kaleidoscope example for LLVM 14. commit 3b1b87304aef097a88de3e40af110825bcb4e999 Author: TB Schardl Date: Fri Jun 24 17:57:57 2022 -0400 [OpenCilkABI] Properly emit diagnostic messages raised when linking a bitcode ABI file. commit 887184c964b3ff4b1293e51eb0297408aee993c2 Author: TB Schardl Date: Fri Jun 24 14:38:48 2022 -0400 [lld] Fix lld handling of command-line arguments for Tapir lowering at link time on Darwin. commit d114aeb38e54f8334a6ca26f5f954ea0bd2b0bde Author: TB Schardl Date: Fri Jun 24 13:39:43 2022 +0000 [lld][LTO] Update lld and LTO to pass flags for Tapir lowering at link time. Based on changes by George Stelle . commit 6f01c4f0a5a475ed166c41e8486e97e3ffbfa42c Author: TB Schardl Date: Fri Jun 24 11:09:46 2022 +0000 [MemoryBuiltins] Add method to get the arguments to a library allocation function, and use that method to simplify the logic in CilkSanitizer and CSI to instrument allocation functions. commit 788723fdd51840ca9fb712d0388c848f1e869016 Author: TB Schardl Date: Thu Jun 23 18:02:02 2022 +0000 [OpenCilkABI] Make the __cilkrts_stack_frame_align variable linkage private, to avoid linking issues when code is compiled with -fopencilk and no optimizations. Update regression tests with new OpenCilk ABI bitcode file. commit 49a91d69042012127355821db819d1b8c439fb2c Author: TB Schardl Date: Thu Jun 23 17:58:06 2022 +0000 [test/Tapir] Fix test case for rebase onto LLVM 14. commit f66e80d84bebab348805349c443b4d9bb5f11a80 Author: TB Schardl Date: Thu Mar 24 19:22:06 2022 +0000 [LoopUnroll] Put custom GraphTraits in llvm namespace, to appease GCC. commit 59b4c1c88f598a5452cd2455851838c0b5d4d347 Author: TB Schardl Date: Fri Mar 18 13:46:44 2022 +0000 [LoopUnroll] Clone task-exit blocks similarly to ordinary loop blocks during loop unrolling. commit 5a0c91c6aabd5f8eafeb0028061a6dbc23f6e552 Author: John F. Carr Date: Fri Mar 11 11:52:24 2022 -0500 Read __cilkrts_stack_frame alignment from bitcode if available commit 02012923412b9310c713fa41a55600aaacf94256 Author: TB Schardl Date: Tue Jun 21 12:59:54 2022 +0000 [IndVarSimplify] Add TapirIndVarSimplify pass, a version of the IndVarSimplify pass that applies only to Tapir loops. Run TapirIndVarSimplify before loop stripmining to help ensure that Tapir loops can be stripmined after other optimizations, such as loop peeling. Addresses issue #88. commit 77c8b63bb7172ccbd7daf53f2f952d4fc2dca3e6 Author: TB Schardl Date: Tue Jun 21 12:53:17 2022 +0000 [OpenCilkABI] Ensure that debug information is attached to __cilkrts_enter_landingpad calls. commit 11d18c63618e13e27f3d2f6dcd8abc6301abe9c5 Author: TB Schardl Date: Tue Jun 21 12:51:12 2022 +0000 [LoopSpawningTI] Set the debug location of a tapir.loop.grainsize intrinsic call to match the loop it applies to. commit fc811c47384077332dd78f32a98b2cd376634be3 Author: TB Schardl Date: Tue Jun 21 12:49:31 2022 +0000 [OpenCilkABI] Fix compiler warning about unused variable. commit cb26ec1ad0e0cd1be88e9e9f99c165ed3493aba0 Author: TB Schardl Date: Sun Jun 19 08:01:04 2022 -0400 [AArch64RegisterInfo] Fix AArch64 code generation to properly use the base register for stealable functions. commit cdd5eae928e63d4f2d117288ac6d1fce037edbe6 Author: TB Schardl Date: Mon Jun 13 11:29:55 2022 +0000 [CGCilk] Fix code generation of unreachable _Cilk_sync statements. Fixes issue #93. commit 42bb03c984419251c1d5f991584f962603dbd6a1 Author: TB Schardl Date: Mon Jun 13 11:27:19 2022 +0000 [Tapir] Remove some uses of getElementType. commit 6b7df525dafe13bd85ef2dc45e0c20812378041f Author: TB Schardl Date: Sun Jun 12 18:26:20 2022 +0000 [TailRecursionElimination] When eliminating a tail call that is separated from a return by only a sync, defer reinserting syncs into return blocks until tail calls have all been eliminated. This change helps TRE remove multiple tail calls that are each separated from a common return by a sync. commit c76ab34aaeb322bdd10445bec97aad0f0903a6e0 Author: TB Schardl Date: Sun Jun 12 18:23:44 2022 +0000 [TapirUtils] When splitting blocks around taskframe intrinsics, try to ensure that a branch inserted after a taskframe.end intrinsic has debug info. commit 1abcc7a38ce6c82351c28abb558da0c419ab54d9 Author: TB Schardl Date: Sun Jun 12 18:21:56 2022 +0000 [SimplifyCFG] Simplify a sync when it immediately leads to another sync in the same sync region. Improve logic for simplifying branches around Tapir instructions. commit 2684472245e9b1b5ce3713e8c492adcd9857a77f Author: TB Schardl Date: Sun Jun 12 18:18:51 2022 +0000 [Outline] When outlining a helper function, remove any prologue data cloned from the parent function onto the helper. Addresses issue #77. commit fe8ff8f3f79949122d8893a8928a1fde965b4427 Author: TB Schardl Date: Sun Jun 12 18:15:35 2022 +0000 [CSI,CilkSanitizer] Add CSISetup pass to canonicalize the CFG before instrumentation when the canonicalization does not preserve analyses. commit 7003d3570447576b00d99aad7287c28b2ce4ab1e Author: TB Schardl Date: Sun Jun 12 14:36:15 2022 +0000 [test/Cilk] Add triple command-line argument to test. commit 66f1485b58a794d4db9f17fb168f3aa91eb7e9bd Author: TB Schardl Date: Sun Jun 12 14:34:40 2022 +0000 [cindex.py] Incorporate Cilk changes into Python bindings. commit 3d8b1e881c1e0d1285f07ddfac99f0898d908b81 Author: TB Schardl Date: Sat Jun 11 21:06:15 2022 +0000 [github] Fix GitHub workflows for opencilk-project. commit 004a118d1f18670af1499720318d4d09c5940c5d Author: TB Schardl Date: Sat Jun 11 20:59:23 2022 +0000 Bug fixes for rebase onto LLVM 14.0.5 commit 9626886e087a11dcfaffb8998499aabfe2855a12 Author: TB Schardl Date: Sat Jun 4 11:38:10 2022 +0000 [Local] Fix bug when removing the unwind destination of a detach insrtuction that is used by multiple detach instructions. commit 323bfeded53653b3f98a03d682ee00126c79192b Author: TB Schardl Date: Thu Mar 10 01:14:28 2022 +0000 [llvm/test] Cleanup and fix a couple Tapir regression tests. commit 3a36939d4a5217c21dae4f567972112900bea426 Author: TB Schardl Date: Mon Mar 7 22:58:48 2022 +0000 Bug fixes for rebase onto LLVM 13.0.1 commit 269b232fe59a569a8c1fc6294bf30f5752c6fd38 Author: TB Schardl Date: Wed Feb 16 15:29:24 2022 +0000 [cmake] Propagate path to llvm-link tool when adding external projects. commit 01aa4fb1628661cb8da8c1b8888be1eec31bd6f0 Author: TB Schardl Date: Wed Feb 16 14:55:52 2022 +0000 [cmake] Add more dependencies between components. commit 34504b8abbab440563f728c517900c373fd04b7f Author: George Stelle Date: Wed Oct 27 09:05:52 2021 -0600 [cmake] Added IRReader dependency to instrumentation component commit 20452488ed2428f9f46e8ab6f36776feb23ae955 Author: TB Schardl Date: Sun Jan 9 00:34:33 2022 +0000 [test/Tapir] Fix output path for GCOV test. commit 755042ce8700e5051cd7fc03289a718824a54357 Author: TB Schardl Date: Sat Jan 8 21:29:45 2022 +0000 [GCOVProfiling] Ensure that GCOVProfiling does not try to split critical edges from detach instructions. commit d961aa51938981ba3f11625b269c3d0f9332adcf Author: TB Schardl Date: Wed Dec 22 19:57:12 2021 +0000 [Tapir][TapirUtils] Add logic to maintain debug locations to enable inlining of bitcode ABI functions with debug information. commit 171342baac4cb817ab415b5c8266b70b3142c314 Author: TB Schardl Date: Sun Nov 21 18:21:01 2021 +0000 [ParseCilk][CGCilk] Fix compiler crashes on bad _Cilk_for inputs. Co-authored-by: John F. Carr commit 1b0608b28fa1954d27bab2859e9cf174d9789da8 Author: TB Schardl Date: Thu Oct 28 19:45:18 2021 +0000 [TapirRaceDetect] Fix crash when checking an indirect call instruction in a block terminated by unreachable. commit 515ee9978d72e11be8ea4e50e04d8c4165aafd07 Author: TB Schardl Date: Thu Oct 28 19:43:08 2021 +0000 [MachineSink] Refine handling of EH_SjLj_Setup constructs to fix performance regressions. commit 309ea1ba51d8689e823bbb8107860c89e6986bf3 Author: TB Schardl Date: Sun Oct 24 00:26:03 2021 +0000 [MachineSink] Ensure that arthimetic that stores results onto the stack is not sunk into a setjmp construct, specifically, between the longjmp destination and the test. Addresses issue #78. commit 77c63f94293aadfb088474ca655b28abe24fd25c Author: TB Schardl Date: Sun Oct 24 00:17:48 2021 +0000 [CGCilk] Fix cilk_for-loop code generation when emission of loop variable may introduce new basic blocks. Addresses issue #77. commit 7bfda87d06194f5bf2a539513cf590b1be8b5b31 Author: TB Schardl Date: Sat Oct 16 20:17:57 2021 +0000 [InlineFunction] Fix logic to update PHI nodes in an exceptional block when inlining tasks at an invoke instruction. Addresses issue #73. commit 9ff936ce98ef4121fd43b3458302ae5fe2d0e1c2 Author: TB Schardl Date: Sat Oct 16 19:15:07 2021 +0000 [CilkSanitizer] Ensure that CilkSanitizer pass properly handles Tapir intrinsics in not within any task, i.e., in unreachable blocks. commit cbd90428774adc9b3753993c3002cd8c84973a85 Author: TB Schardl Date: Sat Oct 16 19:11:43 2021 +0000 [DependenceAnalysis] Fix crash in dependence analysis when analyzing memory references in different loop nests. commit f3aae1752175752975be1b23fe6bde48dec8345d Author: TB Schardl Date: Sun Sep 26 19:54:51 2021 +0000 [SemaExpr] Fix Sema test to disallow _Cilk_spawn on the right-hand side of a compound assignment. commit e2908075ed24effe71403153d07fa1122bc77342 Author: TB Schardl Date: Fri Sep 24 02:25:42 2021 +0000 [TapirRaceDetect] Add logic to handle pointers that cannot be stripped when checking whether a pointer is captured. commit 8154ee135b6cd516068448761bd51d4ff167ee82 Author: Alexandros-Stavros Iliopoulos <1577182+ailiop@users.noreply.github.com> Date: Tue Sep 7 20:13:03 2021 +0000 Create issue template for bug reports commit 463b18d8708ea71b85cebc0b5fe165527cc71fcd Author: William M. Leiserson Date: Mon Jun 28 20:41:33 2021 -0400 [Tapir] Minor modifications to the OCaml bindings; Add OCaml bindings to the regression test suite. commit 42e23a547836772d239b0c7c15a6005810fa6047 Author: William M. Leiserson Date: Fri Jun 25 15:04:48 2021 -0400 [Tapir] Update OCaml bindings. commit 84ac2833abfb2037cc0d74e9a6934a233abf26e3 Author: TB Schardl Date: Thu Sep 16 03:16:17 2021 +0000 [CilkSanitizer][Kaleidoscope] Start deprecating the JitMode option, as it no longer seems necessary with recent changes to LLVM's JIT infrastructure. Clean up Tapir Kaleidoscope code. commit fc132e62d5297bd580febe0be5f8ebd9484e19aa Author: TB Schardl Date: Sun Sep 12 19:15:25 2021 +0000 [OpenCilkABI] Mark all functions internalized from an external bitcode file as available_externally, regardless of whether OpenCilkABI uses that function. commit 0bc52e4863eeaa9f8824cbf494c9fb28cbc6885f Author: TB Schardl Date: Sun Sep 12 19:13:00 2021 +0000 [OpenCilkABI] Remove argmemonly attribute on spawning functions to ensure that changes to the stackframe flags are properly observed when a spawning function calls another spawning function. commit 23b6de4ef8f5830059e43c47aa9fdea198c7a57e Author: TB Schardl Date: Sun Sep 12 19:06:45 2021 +0000 [PassBuilder][PassManagerBuilder] Run LICM before loop spawning to ensure loop-invariant conditions appear outside of the loop. Addresses #66. commit 74b69d346fd5b539e049afc7862f257b8d61ee4c Author: TB Schardl Date: Sun Sep 12 19:04:04 2021 +0000 [TapirLoopInfo] Fix handling of Tapir loops with loop-variant conditions. Fixes #66. commit a26d892c951182fae0231875cbe21b0e6bd4805b Author: TB Schardl Date: Mon Sep 6 22:10:28 2021 +0000 [CilkSanitizer][CSI] Avoid inserting hooks or PHI nodes in placeholder destinations of task exits. This commit addresses #62. commit 8b7e52844ccedfc28c9ca8f701762c7cf51151bf Author: TB Schardl Date: Fri Sep 3 02:13:19 2021 +0000 [OpenCilkABI] Remove redundant code from OpenCilkABI, and add documentation about attributes and linkage for the CilkRTS ABI functions. commit 5f5b3c197d41cbcf867f93b3eecf483a2d206c4d Author: TB Schardl Date: Tue Aug 31 15:31:20 2021 -0400 [TapirTaskInfo] Remove false assertion that the continuation spindle of a detach must not be the same as the spindle containing the detach itself. commit 0f1afa2b0cb90bc258542cf648e60020376d2109 Author: TB Schardl Date: Mon Aug 30 23:57:17 2021 -0400 [Darwin] Automatically use ASan-enabled version of OpenCilk runtime system on Darwin when the Cilk program is compiled with ASan. commit eeef2443122ae71e57f22a5944ee28ddbbad34b8 Author: TB Schardl Date: Tue Aug 31 02:20:17 2021 +0000 [Driver] Automatically use ASan-enabled version of OpenCilk runtime system when the Cilk program is compiled with ASan. commit aaff3f5db5c5136bbea164bcd3b1000a3f2d98b7 Author: TB Schardl Date: Thu Aug 26 12:42:12 2021 +0000 [Kaleidoscope][CSI] Fix Tapir Kaleidoscope code to link against the OpenCilk and Cilksan runtime libraries and run initializers for Cilksan. - Revert changes to general KaleidoscopeJIT code, and add custom KaleidoscopeJIT to Tapir Kaleidoscope example, which supports loading external libraries and running initializers. - Fix CSI jitMode operation to emit llvm.global_ctors entry even when used in a JIT. This CSI change complements recent changes to how Orc handles initializers. commit 0870a416b689118bc077793433375c158e017a5d Author: TB Schardl Date: Tue Aug 24 03:48:57 2021 +0000 [Kaleidoscope] Fix up the Tapir Kaleidoscope example code to use the OpenCilk Tapir target. Some additional work is needed to fix running parallel code using the OpenCilk runtime or Cilksan. commit 3cdb414d46ecfabb2f2932f0519cd988e2e79327 Author: TB Schardl Date: Tue Aug 24 03:16:57 2021 +0000 [CMake] Add Vectorize as a link component of TapirOpts LLVM component library. commit ce08dc233d5385e685fb5d2470a9d5d373adc63e Author: TB Schardl Date: Tue Aug 24 03:15:32 2021 +0000 [LoopStripMinePass] Ensure that loop-stripmining inserts nested syncs when targetting OpenCilk. commit 61501bb8f941d4de750691f26865a6c6f085452d Author: TB Schardl Date: Tue Aug 24 02:59:38 2021 +0000 [Driver][CodeGen] Pass OpenCilk runtime-ABI bitcode file to the OpenCilkABI Tapir target as a Tapir-target option. commit a74202e24967355cbc8f6940ab359453d65633ee Author: TB Schardl Date: Tue Aug 24 02:46:42 2021 +0000 [OpenCilkABI] Get OpenCilk runtime-ABI bitcode file from OpenCilk Tapir-target option. commit b8bcb0430276c826aba92af464d318136338e5ca Author: TB Schardl Date: Tue Aug 24 02:44:17 2021 +0000 [TapirTargetIDs] Add facility to pass options to specific Tapir targets using TLI. Add OpenCilkABI Tapir-target option. commit fabb24be55bf96e6aeec5bbef3f7861f5ecc29b9 Author: TB Schardl Date: Thu Aug 19 14:23:01 2021 +0000 [Tapir] Don't propagate the noreturn attribute to outlined helper functions when the parent is marked noreturn. Similarly, when using DAC loop spawning, don't propagate norecurse to the outlined helper function. commit 6e0197429783874fe5a934f766ff8f5c58b52da2 Author: TB Schardl Date: Mon Aug 9 15:54:59 2021 +0000 [Cilk] Fix the scope definition on _Cilk_scope's, and add basic jump diagnostics to disallow jumping into the middle of a _Cilk_scope. Add a couple simple regression tests for _Cilk_scope's. commit d1632c7a735f24046020633e0b2f001829628481 Author: TB Schardl Date: Sun Aug 8 22:24:25 2021 -0400 [GVN] Prevent GVN from splitting critical detach-continue edges. commit afef9ca56015cd17a667ff1e0e98c8b43bc2a5e4 Author: TB Schardl Date: Sun Aug 8 22:21:53 2021 -0400 [LoopStripMine] Fix test to not require the same loop-cost analysis on all systems. commit 96a4f28c0ef76f6e344c8b35b2e54d74e6e39821 Author: TB Schardl Date: Sat Aug 7 12:02:05 2021 +0000 [OpenCilkABI] Add handling of tapir.runtime.{start,end} intrinsic calls to guide insertion of __cilkrts_enter_frame and __cilkrts_leave_frame ABI calls. commit ec11e602a464d9032015322e8b924a2161825b93 Author: TB Schardl Date: Sat Aug 7 11:56:17 2021 +0000 [OpenCilkABI] Update target to reflect new runtime ABI. Update error handling when the OpenCilkABI target fails to load the runtime ABI bitcode file (and -debug-abi-calls is not specified). commit 3cc10b1efcd02db417bf1d01201c853bcefbe3f5 Author: TB Schardl Date: Fri Aug 6 15:01:45 2021 +0000 [TapirToTarget] Allow Tapir targets to maintain mappings keyed on taskframe-entry blocks in the original function. Add a separate processing routine to handle functions that do not spawn and are not spawned. commit c5b4e5161f69ad8b2be65319e2ad7fbed0309a11 Author: TB Schardl Date: Fri Aug 6 14:46:22 2021 +0000 [LoopStripMine] Fix handling of shared EH blocks so that, if the new parallel loop after stripmining is spawned, then shared EH blocks do not end up shared between the spawned loop and the parent task. This commit addresses issue #58. commit 7e879619c1e50f411cabf5b3253c39e99e6586e6 Author: TB Schardl Date: Fri Aug 6 14:34:53 2021 +0000 [TapirUtils] Allow passes to clone exception-handling blocks of a task. Allow SerializeDetach and cloneEHBlocks to update LoopInfo. Fix dominator-tree updating logic in SerializeDetach and cloneEHBlocks.f commit 79eca2d7caaed0148fa2598ed4839265ab0862ad Author: TB Schardl Date: Fri Aug 6 13:51:48 2021 +0000 [TapirTaskInfo] Fix identification of unassociated-taskframe spindles when taskframe.create and taskframe.end are in the same basic block. commit 771f986363ca1e72be699720f2520cf516c2ef88 Author: TB Schardl Date: Mon Aug 2 21:01:39 2021 +0000 [CGCilk][Intrinsics] Associate tapir.runtime.end intrinsics with tapir.runtime.start intrinsics using tokens. Update _Cilk_scope code generation accordingly. Avoid inserting tapir.runtime.{start,end} intrinsics unnecessarily in spawning functions. commit 702c4169fbbbe9dfb2e77550d9994f9ac0ec53be Author: TB Schardl Date: Thu Jul 29 13:42:10 2021 +0000 [CodeGenFunction][CGCilk] Optimize emission of tapir_runtime_{start,end} intrinsics when nested _Cilk_scopes are used within a function. commit feed355d73e532cdfb0615f223bb15a6c98f6b8a Author: TB Schardl Date: Thu Jul 29 13:36:12 2021 +0000 [TapirToTarget][LoweringUtils] Add generic handling of tapir_runtime_{start,end} intrinsics to Tapir targets. commit 007f01951350f123d20593510b076e1ec2d059c4 Author: TB Schardl Date: Sat Jul 24 16:15:06 2021 +0000 [clang/Cilk][Intrinsics] Add _Cilk_scope construct, a lexical scope that guarantees upon exit to _Cilk_sync any tasks spawned within the scope. Add intrinsics to allow the _Cilk_scope construct to mark where a Tapir-target runtime may be started and stopped at the beginning and end of the scope, respectively. These intrinsics are meant only to be hints to a Tapir-target runtime, not definitive markers for where the runtime must be started or stopped. This requirement is necessary to make it safe to nest _Cilk_scopes, either within the same function or indirectly via function calls. No particular compiler optimizations are included for these intrinsics, although some optimizations may be useful to add in the future. commit e48b168f12dbb2f8d11d47282b63b24a42ac9cbf Author: TB Schardl Date: Sat Jul 24 16:01:26 2021 +0000 [CilkSanitizer][CSI] Extend Cilksan ABI and instrumentation to support race-detecting some Cilk programs that throw exceptions. commit f795e7fb47e17eb99dd569d6c64826cc5086afdc Author: TB Schardl Date: Thu Jul 22 22:58:25 2021 +0000 [CodeGen][Sema] Cleanup Cilk-related code, primarily around code-generation for implicit syncs. commit 84c3ff60e5c6c81f9f6fb26da5659238d55b8f4b Author: TB Schardl Date: Thu Jul 22 21:25:31 2021 +0000 [StmtCilk] Cleanup implementation of CilkSpawnStmt. commit 671a5eaf1f53881640c560335e06ff0b9b8c6395 Author: TB Schardl Date: Thu Jun 3 14:41:35 2021 -0400 [TapirLoopInfo] When searching for a primary induction variable for a Tapir loop, select the widest induction-variable type only among the possible primary induction variables, i.e., integer IVs that start at 0 and have step value 1. This change allows LoopSpawning to handle Tapir loops with multiple IVs where the primary IV is not necessarily the integer IV with the widest type. commit 0be4014fed4d285f8e257eb675ef2d0c2a939070 Author: TB Schardl Date: Thu Jun 3 14:38:08 2021 -0400 [test/Tapir] Mark regression tests requiring an x86_64 target. commit fbc80f0d6f86bb924d39073ad04d9584ecbb03dc Author: TB Schardl Date: Thu Jun 3 08:07:28 2021 -0400 [compiler-rt/cmake] Create custom target for outline atomic helpers, and use this custom target for targets that use these helpers. This additional target resolves a race that arises with the Makefile generator in creating outline atomic helpers for multiple builtin libraries in parallel. commit 5fd97de28c1d4b440bd6eb1efaf4bbbeb4289d68 Author: TB Schardl Date: Tue Jun 1 10:06:42 2021 -0400 [Darwin] Update logic to link OpenCilk runtime on MacOS to handle changes to OpenCilk runtime build system. commit e5dd814d97647f56d7a454e446b79ab431883940 Author: TB Schardl Date: Fri May 28 18:18:12 2021 +0000 [ToolChain] Update logic to link OpenCilk runtime to handle changes to OpenCilk runtime build system. Specifically, add logic to handle the case where the runtime library is compiled with the target architecture added to the library filename. commit fd07ddd86cc63e12dc986be768751d1791aff546 Author: TB Schardl Date: Thu May 27 14:29:11 2021 +0000 [CodeGen][Instrumentation] Resolve some compilation warnings when building with newer compilers. commit f418e5a3b1ddda2f9883aee89eead46cac58f408 Author: TB Schardl Date: Fri Apr 30 12:01:52 2021 +0000 [CSI] Adjust code for adding CSI's constructor to llvm.global_ctors, in order to help bugpoint work on compiler crashes involving CSI and related instrumentation passes. commit de41edb32cc49a8ec5707d42054333c449420e41 Author: TB Schardl Date: Fri Apr 30 11:43:24 2021 +0000 [InstCombine] Optimize tests for removing Tapir intrinsics. commit 79043b03bb7a0c573f8c1048a1bfce6ba1096d13 Author: TB Schardl Date: Thu Apr 29 17:03:20 2021 +0000 [Outline] Fix processing of debug metadata during Tapir outlining that causes metadata in llvm.dbg intrinsics to be incorrectly mapped. commit 2025e34e4f69ca2b6caae36e11af2da73b7d27ec Author: TB Schardl Date: Thu Apr 29 17:01:08 2021 +0000 [CilkSanitizer][CSI] Fix handling of null pointers, intrinsics, and function-pointer arguments. commit 3042493f962a4d3e85a5e26edd0e86b0b7534df7 Author: TB Schardl Date: Thu Apr 29 14:01:28 2021 +0000 [CilkSanitizer][CSI] Modify splitting of simple unreachable-block predecessors to preserve CFG invariants around detached.rethrow instructions. commit 4373949d008613febfd00d93ffeb8514d835065d Author: TB Schardl Date: Mon Apr 26 18:26:14 2021 +0000 [LoweringUtils] Handle PHIs in the unwind destination of a detach when outlining Tapir tasks. commit 66bb692a74308dae0b305e1159ee0e8c7b0d4dee Author: TB Schardl Date: Mon Apr 26 14:56:25 2021 +0000 [LoopStripMine] Fix maintenance of PHI nodes in the unwind destination of a Tapir loop during stripmining. commit af7ec77c5fa74ed2b851bc407ac38a1527b2304d Author: TB Schardl Date: Mon Apr 26 12:31:34 2021 +0000 Bug fixes for rebase onto LLVM 12.0.0 commit a6a7624642aa524ffebf45a64d56278dbddf503f Author: TB Schardl Date: Mon Apr 5 13:07:08 2021 +0000 [LoopUtils] Fix typo in stripmine.enable attribute name. commit fe52c8f8fecbe1cd0f2e236a4c1c87638df667a9 Author: TB Schardl Date: Mon Apr 5 13:05:54 2021 +0000 [InlineFunction] Update PHI nodes properly when function inlining introduces a taskframe.resume or detached.rethrow. commit f6793e3030e954f8c6c0a5a7e9a87bce1d4373a3 Author: TB Schardl Date: Mon Apr 5 13:04:01 2021 +0000 [LoweringUtils] Update PHI nodes in continuation blocks of outlined task frames. commit 42e1e5f340a61dbd096036ad819f67f9ac64bfa1 Author: TB Schardl Date: Mon Apr 5 13:01:59 2021 +0000 [TapirToTarget] Ensure that Tapir lowering never attempts to process a function with no body. commit 18ee06baede9a40f3c7e3d6592aed925b42bb559 Author: TB Schardl Date: Sun Mar 28 17:50:37 2021 +0000 [OpenCilkABI] Adjust how runtime-ABI functions are marked to ensure they never end up defined in the final object file. commit e184a4cfda51c905b14f543747954c50b090809e Author: TB Schardl Date: Sun Mar 28 17:15:12 2021 +0000 [CilkSanitizer] Fix computation of MAAPs and function race info to ensure races are properly checked and reported. commit f5fd041fa362ddbb6eca5d2c7e30783465226958 Author: TB Schardl Date: Mon Mar 22 13:38:52 2021 +0000 [Driver] Add -shared-libcilktool and -static-libcilktool flags to control whether a Cilktool's runtime library is linked statically or dynamically. commit a0e840c07e13d0eadc4fa7e5a7cc8c70b5d407c7 Author: TB Schardl Date: Mon Mar 22 03:30:09 2021 +0000 [CommonArgs] Prevent Cilksan from being linked twice when dynamically linked. commit 82a1f870afce0225f0e657f3d898e7140907645d Author: TB Schardl Date: Fri Mar 19 13:01:00 2021 +0000 [MemorySSA] Change MemorySSA to depend on TaskInfo, and update relevant LLVM passes to ensure the legacy pass manager supports this dependency. commit a40d6019e1f0fa40aa166fc99cb1ee5aab77e783 Author: TB Schardl Date: Fri Mar 19 12:57:17 2021 +0000 [RegisterCoalescer] Enable limited register-coalescing in functions that exoose returns-twice only via LLVM's setjmp intrinsic. commit 66a5bf8c2c9bcdcad85e8d3cabf78d19901cd5b9 Author: TB Schardl Date: Fri Mar 19 12:55:30 2021 +0000 [CodeGen] Add a MachineFunction property to identify when a function exposes returns-twice via a function other than LLVM's setjmp intrinsic. commit 609365a91dbc582bed89f5cf64a189d1a87b3c21 Author: TB Schardl Date: Fri Mar 19 12:50:11 2021 +0000 [MachineSink] Remove workaround for machine-sink optimization in the presence of setjmps, which no longer seems necessary as of LLVM 10. commit 9148d3000e52a414a01096820e1c0745ffa59410 Author: TB Schardl Date: Fri Mar 19 12:45:29 2021 +0000 [SimplifyCFGPass] When removing useless syncs, don't use the placement syncregion.start intrinsics to direct the CFG traversal. commit c3141b3974d7e485168b5e1b6fe9fd11f8ceb219 Author: TB Schardl Date: Fri Mar 19 12:42:23 2021 +0000 [JumpThreading] Fix jump-threading to accommodate Tapir instructions when threading through two blocks. commit b422181ad7e731fb663b98f835dd47c7727f129d Author: TB Schardl Date: Tue Mar 9 19:24:24 2021 +0000 Bug fixes for rebase onto LLVM 11.1.0 commit 600300bc4c1c17ba04d2e3dd7da202b37e826c48 Author: TB Schardl Date: Thu Mar 4 21:08:55 2021 +0000 [CodeGenFunction] Fix emission of implicit syncs before returns for return statements processed early in a function. commit be59935040732156080f28d54d040bcb79f01bf5 Author: TB Schardl Date: Mon Mar 1 20:09:41 2021 -0500 [runtimes] Propagate CMAKE_OSX_DEPLOYMENT_TARGET when building runtimes. commit b9a25c5c5be89edb3c6ed48e53b3e31745d33eb3 Author: TB Schardl Date: Mon Mar 1 18:57:14 2021 +0000 [CSI] When checking llvm.global_ctors for whether a function should be instrumented, properly handle the case where llvm.global_ctors exists but is zero-initialized. commit b1e53f571296252cb8b492d93b17dc62d113d18c Author: TB Schardl Date: Mon Mar 1 18:55:08 2021 +0000 [TapirUtils] Split critical edges from taskframe.resume instructions when splitting blocks around taskframe intrinsics. This change allows ensures that we can properly fixup uses outside of taskframes of SSA values defined inside taskframes when the use occurs in a PHI after a taskframe.resume edge. commit 3209871bb97ee0ed96c92f47a39b5210df249b75 Author: TB Schardl Date: Mon Mar 1 18:48:30 2021 +0000 [TapirRaceDetect][CSI] Remove unused code causing warnings during build. commit 32d4e56990727746ed16592a5bce9717eb67fe15 Author: TB Schardl Date: Mon Mar 1 18:38:00 2021 +0000 [Driver] Make -fopencilk explicitly specify -use-opencilk-runtime-bc, in order to make it less dependent on the implementation of the OpenCilk Tapir target. commit 29d142147954d83b2a066cb46a02945bf10615a6 Author: John F. Carr Date: Fri Feb 26 14:14:19 2021 -0500 Copy LLVM 12 config.guess to add Mac/ARM support commit bd8a7eba0014ed16c9d62bd2ddd0ce48a9a69847 Author: John F. Carr Date: Fri Feb 26 09:00:29 2021 -0500 call killsRegister conditionally commit 5ba0fb37da736507c826762fc82fc70047cbff3b Author: John F. Carr Date: Fri Feb 5 08:22:18 2021 -0500 Update comment commit b4cdeaeb6c57cfc97d20439c2d779eee42485e50 Author: John F. Carr Date: Thu Feb 4 16:47:51 2021 -0500 Fix operand constraint commit cf0693ece519260e8dea4adbb20e31558df240f6 Author: John F. Carr Date: Thu Feb 4 12:30:31 2021 -0500 setjmp and longjump need to save and restore x19 commit ac9d0d7bc6ee2e00298c745416357d755e3bd6f0 Author: John F. Carr Date: Tue Jan 26 10:21:19 2021 -0500 Enable Cilk sanitizer for FreeBSD commit 1faa89e13cfa94e47590f96f7e5fa6ad1bd20d4c Author: John F. Carr Date: Fri Jan 22 13:32:32 2021 -0500 Mark registers dead after setjmp (unfortunately for now also in the fallthrough block) commit be04d4d0273ad29518eeb7a4ffd848ba6c38cf76 Author: John F. Carr Date: Sun Jan 17 10:57:11 2021 -0500 FreeBSD-ARM64 support in sanitizer commit 4505ff70fdb2e997fa1dfa56fb22e0b57b5e2dc6 Author: John F. Carr Date: Thu Jan 21 10:31:01 2021 -0500 Enable for ARM commit acf4774af6fe5232475fa9c49da408ad73e63f26 Author: TB Schardl Date: Thu Feb 25 17:16:53 2021 +0000 [CSI] Fix splitting of predecessors of detach-continutaion blocks to maintain Tapir-CFG structure. commit 70b592a87620a3c16740aa6f148881e495887ffb Author: TB Schardl Date: Thu Feb 25 15:14:16 2021 +0000 [LoopSimplify] Fix splitting of loop-preheader blocks terminated by sync instructions to properly update MemorySSA. commit 95459abf9c05991a28f2cf594d3aac5ebd5a8dc4 Author: TB Schardl Date: Thu Feb 25 01:51:34 2021 +0000 [InitPreprocessor] Add compile-time interpositioning of call_once and pthread_once for Cilksan. commit fb598e8a16596862b7b6b227022e557d947214b4 Author: TB Schardl Date: Wed Feb 24 12:45:20 2021 -0500 [CSI] Cleanup a comment. commit 7265f6b53221cadbcdd01f97033592c072843d8e Author: TB Schardl Date: Wed Feb 24 11:37:28 2021 -0500 [Darwin] Allow dynamic linking of Cilksan runtime on Darwin. commit 5f1cf8c497d50648d8cd44c3c724ec52bfbdeb21 Author: TB Schardl Date: Mon Feb 22 20:01:00 2021 +0000 [CilkSanitizer] Do not generate calls to a default libhook for library- or intrinsic-function calls. The weak-symbol approach previously used to insert calls to a default libhook does not work when Cilksan is dynamically linked, which is necessary on some platforms. commit 2b0b1834068786f68bf0bae743f467d88525c9e0 Author: TB Schardl Date: Mon Feb 22 19:45:33 2021 +0000 [InitPreprocessor] Modify Cilksan to interpose locking routines at compile time, rather than using dynamic interpositioning. This change avoids recursion problems when third-party libraries, such as jemalloc, use these locking routines. commit c4f4bf11c7c22d480a0152f226fa2202d7f28eb3 Author: TB Schardl Date: Mon Feb 22 18:13:53 2021 +0000 [test/Cilk] Make test explicitly get the OpenCilk runtime ABI functions from a bitcode file. commit 7fe09e424589631d544384955c4d6f6a3f770935 Author: TB Schardl Date: Mon Feb 22 18:12:40 2021 +0000 [CilkSanitizer] Make CilkSanitizer properly handle new allocation functions recognized by LLVM. commit 530d1762209a0dcde07b7ade0902477036458af4 Author: TB Schardl Date: Mon Feb 22 18:10:27 2021 +0000 [InlineFunction] Properly handle resumes when inlining a function that requires enclosing the inlined-function body within a taskframe. commit 41bcde76220f50c8fb6562589a5c62a944b99fd0 Author: TB Schardl Date: Mon Feb 22 18:07:26 2021 +0000 [TapirToTarget][LoweringUtils] Properly handle PHI nodes in shared-EH spindles when lowering taskframes. commit bc45f0224268df71a3c1992d80150d841656854c Author: TB Schardl Date: Mon Feb 22 18:04:41 2021 +0000 [LoweringUtils] Properly handle shared-EH spindles when outlining taskframes. commit 66dbdb652dc20d34fdd348b98bac159869bd71ee Author: TB Schardl Date: Mon Feb 22 17:56:54 2021 +0000 [TapirRaceDetect] Strip constant-inbounds-GEPs from pointers when checking whether a pointer is captured. commit 2c4682c106b19f8e8dbec0063156c73c27aa6cba Author: TB Schardl Date: Mon Feb 22 17:54:42 2021 +0000 [TapirRaceDetect] Fix assertion failure when race detection analyzes pointers in a loop in the exit of a Tapir loop. commit a5cfe6c736555260ffaf5bfebcc69ae708a84162 Author: TB Schardl Date: Mon Feb 15 17:58:15 2021 +0000 [LoopSpawningTI] Avoid hoisting allocas to the entry block of the recursive divide-and-conquer function generated by LoopSpawning. This change tempers the growth in stack space from this transformation of parallel loops. commit 2d873d48057aa753ffb2dd5ccb4a5328225b6962 Author: John F. Carr Date: Sat Feb 6 17:21:23 2021 -0500 BRNZ-straightening code needs to call processPHI too commit 06352d5dbbcea3c787c4d50943460ac7d6bddafb Author: TB Schardl Date: Sun Jan 31 17:05:48 2021 +0000 [LoopSpawningTI] Fix loop-spawning pass to handle Tapir loops where the tripcount and primary induction variable have different types. commit 2df4215565b2d2c63469f6a1bd1a717a091b0884 Author: TB Schardl Date: Sun Jan 31 17:05:06 2021 +0000 [LoopStripMine] Fix loop stripmining to handle Tapir loops where the tripcount and primary induction variable have different types. commit 71c0c5fc4a5cce4527991049ea7e1d929d903359 Author: TB Schardl Date: Sun Jan 31 17:00:00 2021 +0000 [TapirLoopInfo] Fix bug with handling Tapir loops with unusual checks for the loop-end condition. commit 3c62c5be0909e8d4df1613d7c0efffa2b8cb86b3 Author: TB Schardl Date: Sun Jan 24 04:15:13 2021 +0000 [InlineFunction] Fix bug with unifying resume instructions when inlining a function that can spawn. commit 7595f894c90c4d5798519ce9d447ac9a722537cb Author: TB Schardl Date: Sun Jan 24 04:13:24 2021 +0000 [LCSSA][LICM] When LCSSA inserts a PHI node into the unwind destination of a detach within a loop, ensure that that PHI node inherits the correct value from the detached.rethrow predecessor. Update LICM to avoid splitting the detach and detached.rethrow predecessors of the unwind destination of a detach in a loop. commit 1eaa0cc03e4acb609e0342113ea92dbfb69b52f9 Author: TB Schardl Date: Wed Jan 20 15:52:45 2021 +0000 [Driver] Fix compiler flags for enabling OpenCilk pedigrees. commit bd3ab19a8405337a2de6228ff9a6bd4245f08374 Author: TB Schardl Date: Tue Jan 19 10:56:39 2021 -0500 [Driver][CodeGenAction] Fix OpenCilk compilation with runtime-ABI bitcode file and with Cilksan on MacOSX. commit 43141067111ca83e4111b939cef2b2771830dff9 Author: TB Schardl Date: Mon Jan 18 22:06:23 2021 +0000 [CilkSanitizer] Fix attributes on Cilksan instrumentation hooks to reflect that they are not allowed to throw. commit b5c8999ae501aab8397cb13f33090de0dbbc8b84 Author: TB Schardl Date: Mon Jan 18 22:04:54 2021 +0000 [CSI] Avoid instrumenting functions in the startup section, since it is not necessarily safe to run arbitrary instrumentation code within such functions. commit 0337ee2e41e6bae6f68b826201e4e51ba9234ae5 Author: TB Schardl Date: Mon Jan 18 22:03:01 2021 +0000 [clang/test] Update test case to reflect new support for builtin setjmp and longjmp for 64-bit ARM. commit e63ff7f9956ec698718d5b0b888dd84bcd3bb209 Author: TB Schardl Date: Mon Jan 18 21:56:36 2021 +0000 [TapirUtils] Skip promotion of calls to invokes in detached task where the detach instruction already has an unwind destination. The semantics of that detach imply that any calls within the task must already be invokes. commit 4414fa777d8dc519c5268a9cf700494d143d4425 Author: John F. Carr Date: Sun Jan 17 14:38:12 2021 -0500 Fix crash on load of symbol address; avoid using a value that has been marked killed commit 5d63ac80cec94f71cf0658e4240fa5fe02cc9776 Author: John F. Carr Date: Sat Jan 16 18:40:57 2021 -0500 Fix up some bad code generated by setjmp after most optimizations have run commit 04df3551326cdaaefa69527182ae28ec56f23ea3 Author: John F. Carr Date: Sat Jan 16 17:03:54 2021 -0500 Builtin setjmp and longjmp for 64 bit ARM commit 13459b70c336815c534dd9e8863b7c14130e9bb9 Author: John F. Carr Date: Sat Jan 16 15:58:30 2021 -0500 Fix longjmp reading from frame commit 3d74e5e3e7edbc87962f14d2a798dd07640a5281 Author: John F. Carr Date: Thu Jan 14 17:28:20 2021 -0500 Handle Tapir intrinsics in IR translator commit c118f943ca6a5997ce4a97460706385e10d0ee42 Author: TB Schardl Date: Fri Jan 15 16:42:23 2021 +0000 [CilkSanitizer] Fix insertion of __csan_task instrumentation in a detached block after a taskframe.use call in that block. commit eafb4e30267b686fe1acc486f38acc9d2e33ba5a Author: TB Schardl Date: Fri Jan 15 03:02:16 2021 +0000 [Frontend][ToolChain] Draft compiler flags for enabling pedigrees in OpenCilk. commit 8b54ae3f43e574874675781c6bb4d0f541a7ef11 Author: TB Schardl Date: Fri Jan 15 02:58:19 2021 +0000 [ToolChain] Add flags to specify alternate OpenCilk ABI bitcode file or alternate OpenCilk resource directory. commit a393722d084e6f19749c673e9151451f4f38d7f3 Author: TB Schardl Date: Thu Jan 14 03:23:14 2021 +0000 [LICM] Prevent hoisting of loads from thread_local variables out of Tapir loops. commit 9c1c667a20853c1cba7a6c6d04d56b891f695c55 Author: TB Schardl Date: Thu Jan 14 03:04:42 2021 +0000 [OpenCilkABI] Code cleanup commit eb2a52ceb198dc3f3a7bc415e69f88f07ce8e4c6 Author: TB Schardl Date: Thu Jan 14 03:03:41 2021 +0000 [CilkSanitizer] Reduce MAAP sizes to 1 byte. commit 3e47ab1480597a2111b7cb50cc66762fd02b3e0d Author: TB Schardl Date: Thu Jan 14 03:02:05 2021 +0000 [CilkSanitizer] Cleanup code, and re-enable MAAP checks by default. commit 38c073ccdeaf25ea600a02c87e0371a48d0cf766 Author: TB Schardl Date: Thu Jan 14 02:58:53 2021 +0000 [CilkSanitizer] Add custom instrumentation for intrinsics and standard-library functions, to enable effective race detection involving those operations. commit 05b3717156d103425bdfb14b1dc7741420b082ba Author: TB Schardl Date: Sat Jan 9 14:00:37 2021 +0000 [CilkSanitizer] Change the name of the runtime unit_init function to allow codes compiled with both Cilksan and Cilkscale to compile and run successfully commit 0cfee61d6dd4dcd925f7b7a95306716216a49140 Author: TB Schardl Date: Fri Jan 8 18:24:10 2021 +0000 [CilkSanitizer] Add property bit to identify atomic loads and stores. commit dae2d2df74a173bd2fb7d33bd27fe4602656c10d Author: TB Schardl Date: Fri Jan 8 18:23:20 2021 +0000 [TapirRaceDetect] Remove dead code commit 902d9ea85fa97f98c91e705b691cc8736a873996 Author: TB Schardl Date: Fri Jan 8 18:17:01 2021 +0000 [OpenCilkABI] Use __cilkrts_save_fp_ctrl_state function, provided by ABI bitcode file, to save floating-point control state. commit 107254d7cf19b99f83e868bc2764bca056a0c1e1 Author: TB Schardl Date: Thu Jan 7 02:04:08 2021 +0000 [Driver][OpenCilkABI] Clean up error handling for linking an OpenCilk runtime ABI bitcode file, and remove unnecessary dependencies that the build process did not always properly resolve. commit e58bbc69313eac4dc1c2d260f55195f71b4e7611 Author: TB Schardl Date: Tue Jan 5 14:41:36 2021 +0000 [OpenCilkABI] Use optimized version of __cilkrts_enter_frame for spawn-helper functions. commit b700ff1f2db5cf123d581ea3c7be2425a0fbf62a Author: TB Schardl Date: Tue Jan 5 14:40:22 2021 +0000 [OpenCilkABI] Update OpenCilkABI Tapir target to no longer rename the main function. This change complements the change to Cheetah to no longer use main-renaming to startup the runtime system. commit 119b3d0fdab42183bdd7cf35dc41654d3eea607d Author: TB Schardl Date: Tue Jan 5 14:36:36 2021 +0000 [LoopSpawningTI] Fix bug when lowering Tapir loops containing taskframe.resume exits. commit 6b3f2fdd590c440342b9a0adcf14b7086b3ca524 Author: TB Schardl Date: Tue Jan 5 14:33:53 2021 +0000 [test/CodeGen] Update tests to account for TapirCleanup pass during CodeGen. commit 9a07384456228a7479d54e6b20ba710f8c60d623 Author: TB Schardl Date: Tue Jan 5 14:32:09 2021 +0000 [LoopStripMine] Fix bug where stripmining a loop over an inclusive range of iterations would drop a loop iteration from the loop. commit 8400a411440153987fcdee60c4b1eb7ace824472 Author: TB Schardl Date: Tue Jan 5 14:27:10 2021 +0000 [test/Cilk] Add an OpenCilk ABI bitcode file for -fopencilk compile tests to use. commit 44d85bd59f14b16895df1b569cd09d6e7346cde7 Author: TB Schardl Date: Tue Jan 5 14:20:08 2021 +0000 [OpenCilkABI] Use a bitcode file for the OpenCilk ABI by default. commit bddece71481e9eb5ccb69c54d901e8ed0051fd1d Author: TB Schardl Date: Tue Jan 5 14:13:54 2021 +0000 [Driver] Make -fopencilk automatically use a bitcode file for the OpenCilk ABI. commit 22b9fb90ea66fafa791d324a420046f2226157e7 Author: TB Schardl Date: Sun Dec 20 02:22:33 2020 +0000 [TapirRaceDetect][CilkSanitizer][TapirToTarget] Cleanup code to avoid some compiler warnings during build. commit 7d0c8f5d31a86082838acd70586dceef8943b66d Author: TB Schardl Date: Sun Dec 20 02:20:59 2020 +0000 [Tapir] Add prepareModule method to Tapir targets for running code that modifies a module before lowering Tapir to runtime calls. Cleanup and format Tapir lowering code. commit c8bfede15143104b7615ba5ccfc2a78a3a4973cd Author: TB Schardl Date: Sun Dec 20 02:14:13 2020 +0000 [compiler-rt][llvm] Move cilksan, cilkscale, and csi out of compiler-rt and into a separate repo for productivity tools. Update CMake to integrate compilation of cilktools as a subproject or runtime. commit 01f75a5855e4428d8dd99b394905b654b6ec5d1e Author: Tim Kaler Date: Wed Oct 28 19:04:55 2020 -0400 Demo of external bitcode runtime commit 7f1f70b60274762aafc2287557fb2d0aa6b73bcc Author: TB Schardl Date: Sat Dec 12 22:05:43 2020 +0000 [CodeGenPGO] Ensure that PGO correctly instruments cilk_for loops with atomic instrumentation. Fixes #30 commit e94f8d9553bd237788475422fbc0aef294504320 Author: TB Schardl Date: Sat Dec 5 18:37:39 2020 +0000 [TapirRaceDetect] Fix call to PointerMayBeCapturedBefore to accommodate pointers with casts. commit aeffc639215e750a1df535fce7904d9158f663b0 Author: TB Schardl Date: Sat Dec 5 16:46:46 2020 +0000 [AliasAnalysis] Update names of functions that check if a Value is a noalias call or identified object assuming the Value is in the same spindle. commit 9ec08c740c2a790795056f5a19c5ab787bda508c Author: TB Schardl Date: Sat Dec 5 16:41:17 2020 +0000 [Outline] Ensure that outlining assigns the correct min-legal-vector-width to the outlined function it creates. commit b76c5d22d65a4d9c39e855c3a4cfdd46d0562b69 Author: TB Schardl Date: Fri Dec 4 13:12:09 2020 +0000 [CodeGenFunction] Remove a redundant check. commit 1f37410c0b64d08a4fc24cbe545c531eb8311d3a Author: TB Schardl Date: Mon Nov 30 00:49:26 2020 +0000 [Verifier] Add verification of CFG structure for Tapir tasks. Add check that syncregions are not used in subtasks. Update test cases to satisfy Tapir task-structure verification. commit ffa78f97fcda83ce08e5a05219e9f51da18275d1 Author: TB Schardl Date: Mon Nov 30 00:46:56 2020 +0000 [LoopStripMine] Fix loop stripming to handle case where detach-unwind and associated detached.rethrow destination are the same. commit 50f4c8ea0817fff2bfb1c408b715eb6131a9b99b Author: TB Schardl Date: Sun Nov 29 22:39:52 2020 +0000 [TapirUtils] Fix inlining to ensure that parallel functions inlined into the spawned body of a Tapir loop in the continuation of a spawn in the caller is properly enclosed in a taskframe. commit 2af32dc522cc6f5135245a8aa2d260233e96882a Author: TB Schardl Date: Sat Nov 28 17:15:29 2020 +0000 [TapirRaceDetect] Identify underlying objects of two accesses as NoAlias if an identified object underlying one access cannot escape before the other access. commit 3e8ae1aed332f5515e1e5f6efccfde4b3ed92dad Author: TB Schardl Date: Sat Nov 28 17:13:18 2020 +0000 [LoopRotationUtils] Ensure that TaskInfo is updated whenever LoopRotate modifies the CFG. commit 9b134dc0be655f8943090e993e663baeca76c24c Author: TB Schardl Date: Sat Nov 28 17:11:58 2020 +0000 [TapirUtils][CSI][CilkSanitizer] Fix bugs and inconsistencies in transforming a Tapir function with taskframes for CSI or CilkSanitizer instrumentation. In particular, ensure that inserted cleanup blocks properly respect taksframe instrinsics; preserve the structural property that detaches unwind to the same destination as associated detached.rethrows; and exclude inserting instrumentation in placeholder basic blocks. commit 2ca87aad0cfbf5465abf8f07d792d2635403d2c2 Author: TB Schardl Date: Sat Nov 28 17:03:02 2020 +0000 [Tapir] Use a custom ValueMaterializer for Tapir outlining. This commit uses the custom ValueMaterializer to properly materialize a specified input syncregion, i.e., to ensure that any associated debug info is properly updated. In the future, this change may be extended to support additional Target-specific customization of the Tapir outlining process. commit 94764151a51cb0a9eb97e9325ae2fb1048c212e5 Author: TB Schardl Date: Sat Nov 28 16:55:24 2020 +0000 [LoopInfo][LoopUtils][TapirUtils] Update LoopInfo and associated utilities and passes to treat exit blocks inside of tasks within a loop as part of the loop. commit 2d0d21a3da1c2adf2c1c1787bae45b5de0b291a2 Author: TB Schardl Date: Sat Nov 28 15:57:34 2020 +0000 [TapirTaskInfo] Fix determination of isAllocaParallelPromotable to handle cases where an alloca is used only in one spindle, but that spindle does not match the spindle where the alloca is defined. commit fe0e0b2e0aba84282d6ce9dfcd8c8059df944f73 Author: TB Schardl Date: Tue Nov 17 19:25:18 2020 +0000 [MemorySSA] Clean up propagation of TaskInfo in MemorySSA to ensure different parts of MemorySSA all use the same TaskInfo. commit 4a785515ee01ebcec62334d02ff1e8f0f58a2edf Author: TB Schardl Date: Sun Nov 8 21:03:35 2020 +0000 [FunctionAttrs][LoweringUtils] Minor code cleanup and optimization. commit 92d6cf362c7f4280f287f5ad72d431b944319914 Author: TB Schardl Date: Sun Nov 8 21:01:24 2020 +0000 [PassBuilder][PassManagerBuilder] Run LICM after loop stripmining, to hoist code out of the generated inner serial loop. This change allows for substantial optimization of Tapir loops containing __cilkrts_hyper_lookup calls. commit acfdeaa052c4bb73d0fa1e024095295ebaabf6f0 Author: TB Schardl Date: Sun Nov 8 20:55:09 2020 +0000 [LoopUtils] When forming dedicated loop exits, treat all blocks inside of Tapir tasks in the loop as part of the loop, even if LoopInfo does not recognize those blocks as belonging to the loop. commit 5f8e1318d75a4a3396260ca40eced7784b505a2c Author: TB Schardl Date: Sun Nov 8 20:51:36 2020 +0000 [CodeMetrics] Treat Tapir-target-specific library functions as builtins when analyzing code metrics. commit e15b4d5b66e7aa91e06becda01ca5e6cd7f1e484 Author: TB Schardl Date: Sun Nov 8 20:49:37 2020 +0000 [TargetLibraryInfo] Add infrastructure to record Tapir-target-specific library functions. Record some libraries for Cilk and OpenCilk Tapir targets. Update the default Tapir target to be OpenCilk. commit bdd0f96ef31ebc9ba9ad5b3c70f3d011476c0729 Author: TB Schardl Date: Sun Nov 8 20:43:56 2020 +0000 [MemorySSA] Update memory SSA to recognize clobber queries within the same spindle and to use that spindle information for alias-analysis queries. commit d1c4f88dc4a844a6b59261e5b20f266cb38f9121 Author: TB Schardl Date: Sun Nov 8 20:38:36 2020 +0000 [AliasAnalysis] Extend alias analysis to support alias queries for instructions in the same strand and to utilize the strand_pure and strand_noalias attributes for such queries. commit 70366be2c8fc62b4b67bf6d8468c050c2d515f9b Author: TB Schardl Date: Sun Nov 8 20:34:14 2020 +0000 [Attr] Add clang attribute strand_malloc that maps to strand_noalias in LLVM IR. commit 12029c49e18ace5156db888629fc85d34dd6df53 Author: TB Schardl Date: Sun Nov 8 20:32:23 2020 +0000 [Attributes] Add strand_noalias attribute to annotate return values from functions that should be treated like system memory allocation functions for memory accesses within the same strand. commit 12691c45cc8b26b13a7f2bc177005a75b5aa3899 Author: TB Schardl Date: Sat Jul 25 15:58:16 2020 +0000 [PassBuilder][PassManagerBuilder] Run EarlyCSE to remove common subexpressions that emerge from SimplifyCFG's sinking optimization commit c5c98c4aee3f70c7b0455ec6f1336d53d7e22f9b Author: TB Schardl Date: Fri Jul 24 20:40:31 2020 +0000 [LICM] Enable loop-invariant-code motion of strand_pure function calls out of serial loops commit c5b80a52e7e59b9f211339aae160213e0b095acc Author: TB Schardl Date: Fri Jul 24 15:44:04 2020 +0000 [AliasSetTracker] Use the strand_pure attribute to refine the computation of alias sets commit 3f2c12bf3cb2129d596d5aadd420c82667edbff2 Author: TB Schardl Date: Fri Jul 24 15:41:23 2020 +0000 [Attr] Add strand_pure and stealable clang attributes that correspond to synonymous LLVM attributes commit c99deef484a67840c966079070c37e1e33f8722e Author: TB Schardl Date: Fri Jul 24 15:37:04 2020 +0000 [Attributes] Add strand_pure LLVM attribute to identify functions that act like pure functions within a single strand of a Cilk or Tapir computation commit 5dc9609e9797e2901208903f87955989acb02a27 Author: TB Schardl Date: Fri Oct 30 03:36:59 2020 +0000 [PassBuilder][PassManagerBuilder][Tapir] Move verification of the IR after Tapir lowering steps to speed up compilation. In particular, avoid checking the whole module after processing each function. commit 38b3a05767e13ee3f318d3c9ed6810edb110c671 Author: TB Schardl Date: Fri Oct 23 14:13:56 2020 +0000 [CilkSanitizer][CSI] Fix canonicalization of LLVM IR to ensure a single after_call hook runs after an invoke, even when those invokes occur near unsimplified loops in the original code. commit 803d4c2ee9da301693b2f718acc8ca8f2cc9c1cf Author: TB Schardl Date: Fri Oct 23 01:19:12 2020 +0000 [IndVarSimplify][TapirLoopInfo] Fix bug with performing loop spawning on a loop with multiple canonical induction variables of different sizes. commit bfa6fce93b342e856162ac54b35ad24fcc0b8c90 Author: TB Schardl Date: Thu Oct 22 22:08:24 2020 +0000 [CSI] Use Latency cost model, instead of RecipThroughput cost model, to model the cost of a basic block. Computation of the RecipThroughout cost of a basic block can fail due types in the LLVM IR that are not modeled on the target architecture. Addresses #24. commit 79e1f41e835c6c056c84f7a1c09d02f40355bb10 Author: TB Schardl Date: Thu Oct 22 21:40:39 2020 +0000 [CommonArgs][cilksan] Interpose hyperobject allocation and deallocation to ensure that Cilksan avoids false-positive race reports on updates to reducer views when the memory allocator reuses the same memory for different views during race detection. commit 3f4535f29339ac3bfdd8d353a1832ec7b40032e9 Author: TB Schardl Date: Tue Oct 6 12:40:36 2020 +0000 [cilksan] Ensure that updates on the shadow memory affect a positive (non-zero) number of bytes. commit b085b0dbf7e6ad43f4d4023e3f07c07f9da6946e Author: TB Schardl Date: Tue Sep 29 02:21:41 2020 +0000 [CilkSanitizer] Fix bug with hoisting coarsened instrumentation out of loops with an unusual structure. commit 9e9ad36fc8d9ed2fe3dfb0d2c3c7636504718311 Author: TB Schardl Date: Tue Sep 22 09:56:32 2020 -0400 [cilkscale] Update Cilkscale C++ API to make it easier to accumulate measurements into wsp_t variables. commit 2db581288e7d3a22b05a163a01394b56f576f4de Author: TB Schardl Date: Sat Sep 19 17:07:11 2020 -0400 [LoopInfo] Fix helper-function prototype to fix build errors commit edd04b1a4688fb79d357be453e891a9afbf4083b Author: TB Schardl Date: Sat Sep 19 15:51:58 2020 -0400 [TapirTaskInfo][LoopInfo] Fix to address compiler warnings from newer versions of clang commit bc726136de4c0248af8a95b1c69b2294c1a8d985 Author: TB Schardl Date: Thu Sep 17 21:44:49 2020 +0000 [CilkSanitizer] Format code, and rename MAAPVal enum to MAAPValue, to work around compilation problem on GCC 5. commit f7f095c1c8c68a0b6beeba00386c093ca2a23976 Author: TB Schardl Date: Mon Sep 7 19:19:08 2020 +0000 Fix build errors on some compilers and systems commit c80e7b3c5a0687bf69da77525412c7a654278b31 Author: TB Schardl Date: Sat Sep 5 21:42:10 2020 +0000 [Outline] Update the min-legal-vector-width function attribute when outlined function contains a vector argument. commit bfb64f8b7c4186491e001f75de49a5aea6a75e47 Author: TB Schardl Date: Sat Sep 5 21:41:10 2020 +0000 [LoopStripMine] Ensure that loop attributes are correctly assigned to generated stripmined loop, new outer parallel loop, and epilogue loop. commit c3fb7eacfe99bab4db874d3af67433cc1fd88153 Author: TB Schardl Date: Sat Sep 5 21:09:04 2020 +0000 [TapirUtils] Remove unused utility method. Fix typos, and remove debugging print statement. commit 8dc7c55a091260c8db8ed9305c9d1eaafa50efe3 Author: TB Schardl Date: Sat Sep 5 21:03:26 2020 +0000 [CGLoopInfo] Fix handling of Tapir loop metadata, to ensure its always accessible for Tapir lowering, regardless of what loop passes run. commit b9102be980ba1585693f55fe1978bd9db6a56bca Author: TB Schardl Date: Thu Sep 3 02:39:41 2020 +0000 Bug fixes for rebase onto LLVM 10.0.1 commit d9e39b763c0898997cadea00bac5d56ab2adee24 Author: TB Schardl Date: Wed Aug 26 22:06:41 2020 -0400 [cilksan] Only dynamically interpose mremap symbol on Linux. In particular, mremap is not present on Darwin. commit 69f8347489e5e0be6d319244000d0130623fec5c Author: TB Schardl Date: Sun Aug 23 15:20:51 2020 +0000 [cilksan][cilkscale] Update to match OpenCilk runtime API changes. commit 826febb3785d84bb7c1bbdda7b8dbb2142d023cc Author: TB Schardl Date: Sun Aug 23 15:18:40 2020 +0000 [test/Cilk][test/Tapir] Update regression tests to match ABI changes. commit 42cee2f727bf15c86d7032eaa4e4313c871e66c5 Author: John F. Carr Date: Fri Jul 17 15:38:12 2020 -0400 ABI changes commit 17a4708ddb385d336cf316f9264ffb4efbc70078 Author: TB Schardl Date: Sun Aug 23 04:05:07 2020 +0000 [cilksan] Various performance improvements, including using mmap to allocate page-table shadow memory structure to conserve memory footprint commit f09167b54478dd9a96d54d5fd82169bc81ff13c3 Author: TB Schardl Date: Wed Aug 12 17:40:55 2020 +0000 [CilkSanitizer] Ensure that instrumentation-hoisting is only performed when the accessed addresses can be computed before entering the loop. commit 9c04d7974b6b38edba37ebaba73d5b38c05af6b5 Author: TB Schardl Date: Tue Aug 11 13:43:08 2020 +0000 [LoopSpawningTI] Fix handling of SSA values used in metadata during outlining. commit a5d8ff14375c34d452bb13da8971f20639b7b032 Author: TB Schardl Date: Mon Aug 17 13:48:52 2020 +0000 [cilksan] Fix address and size computation when freeing shadow memory due to a realloc. commit 90420be060a0623b0626fb7edb7cc2d1bdbe837c Author: TB Schardl Date: Sun Aug 16 18:03:14 2020 +0000 [CilkSanitizer] Fix to use new function names for MAAP checks. commit 5bea39d8005110376cbf47ea127f5f0c4acb9e33 Author: TB Schardl Date: Tue Aug 11 01:22:46 2020 +0000 [CilkSanitizer] Only sink coalesced instrumentation for finite loops, and make sure to use the right trip-count computation. commit 9598190ac41efdda49ee8dd1752dd8aaf6cd66e8 Author: TB Schardl Date: Tue Aug 11 00:56:19 2020 +0000 [cilksan] When a free occurs when no parallel execution is occuring, simply clear the freed shadow memory, rather than treat it as a write. commit aee89b180786a7a0b2589ef80c6e9462f0e93cb0 Author: TB Schardl Date: Sun Aug 9 17:16:19 2020 +0000 [CilkSanitizer] Incorporate changes for inserting coalesced instrumentation that is sunk to the loop exits. commit 58d714c8b25c3870eca56bf594a0844a3eb2dcc8 Author: TB Schardl Date: Sat Aug 8 15:18:10 2020 +0000 [CilkSanitizer] Add MAAP check for instrumentation sunk after a loop. Start to generalize sunk instrumentation to handle loops with multiple exits. commit cda9d7a1d9aec4a60724d836c798e050af998c34 Author: Grace Yin Date: Sat Aug 8 03:02:54 2020 -0400 cleanup commit d7eefb0acc6b0a82dbad807e5d7f100630f52891 Author: Grace Yin Date: Sat Aug 8 02:32:30 2020 -0400 sinking (messy) commit f91bf9a350ddd859e8782625932b94fbe468ad0f Author: Grace Yin Date: Fri Aug 7 04:27:32 2020 -0400 cleanup commit 5212757f27f8ab79e0b84de0f6a2010e09af41ef Author: Grace Yin Date: Fri Aug 7 04:19:15 2020 -0400 handle negative strides (messy) commit e437ce8a69a8e3048b228b98ba3905f1aba2ddab Author: TB Schardl Date: Sat Aug 8 13:53:18 2020 +0000 [CilkSanitizer] Ensure that loop hoisting is not performed when static race detection is disabled. commit 373c9f6ba0b88a3c749e6ad52970cf1b50596a76 Author: TB Schardl Date: Sat Aug 8 13:51:24 2020 +0000 [CilkSanitizer] Fix bug where allocas were not getting instrumented when loop hoisting was enabled. commit 371a9b28141e79bf68201c3c747e3c42cb2cd1a4 Author: TB Schardl Date: Sat Aug 8 13:46:46 2020 +0000 [CilkSanitizer][ComprehensiveStaticInstrumentation] Support instrumenting a program with CilkSanitizer by running the CilkSanitizer pass multiple times, where different runs insert different sets of instrumentation. commit 2888b29b15acaee2c404efd14af45af505de38a3 Author: TB Schardl Date: Sat Aug 8 13:41:55 2020 +0000 [TapirRaceDetect] Avoid computing runtime checks, since they're expensive to compute and we don't currently use them. commit 751b92bd4d77c71246fa05aa2d65a7d40169ad77 Author: TB Schardl Date: Sat Aug 8 13:40:36 2020 +0000 [CilkSanitizer] Use the ScalarEvolution analysis used by RaceInfo, rather than a separate invocation of ScalarEvolution. This change seems to work around some problems with Module passes that need multiple Function analyses. commit 72e69a6e98af5f4e4965268e67c83af2a294eae4 Author: TB Schardl Date: Sat Aug 8 13:31:15 2020 +0000 [CilkSanitizer] Remove DependenceAnalysis from the analyses that CilkSanitizer requires. CilkSanitizer no longer uses this analysis directly, but only indirectly through TapirRaceDetect. commit a5c55d7ba8b8a5017cbc0fd9a53db05217938bbf Author: TB Schardl Date: Fri Aug 7 20:36:18 2020 +0000 [CilkSanitizer] Ensure debug locations on __csan_func_entry and __csan_func_exit hooks. commit a8fee97448896e17c51c8ec8fdd50cadcb7eada8 Author: TB Schardl Date: Fri Aug 7 20:19:39 2020 +0000 [cilksan] Fix attributes for compilation using GCC. commit 385df84ab6f3d75318bd19e090ccc43dcdd258bf Author: TB Schardl Date: Thu Aug 6 16:51:44 2020 +0000 [CilkSanitizer] Disable MAAP checks by default. commit 8bb59eeb15512c5c42a1251a82c5e8a46bb25f68 Author: TB Schardl Date: Thu Aug 6 03:54:22 2020 +0000 [CilkSanitizer] Use option to disable MAAP check on loop-hoisted instrumentation. commit b6897c676be59cbc37a733dc32468278fa7b0a29 Author: Grace Yin Date: Wed Aug 5 01:42:34 2020 -0400 fixed error where two scevs of different sizes are multiplied together commit 2514d3795947b8434e59aa0e71bf617712a88b59 Author: Grace Yin Date: Tue Aug 4 03:02:15 2020 -0400 Fix bug to check for correct alternative race types commit c03d134cd24a53442b7740f452dfd5871aa16ef2 Author: Grace Yin Date: Tue Aug 4 02:52:33 2020 -0400 suppression for hoisted instrumentation commit 00ec2fa9671118f92c4d95819a87442997d9f025 Author: Grace Yin Date: Mon Aug 3 22:15:08 2020 -0400 stats for hoisted instrumentation commit fb485f92f6794c3dc691cf67426904258b6f44c4 Author: Grace Yin Date: Mon Aug 3 22:03:52 2020 -0400 comments commit ff6e1faf48619d84f3dbcb0f7e7aa8e121f7c1aa Author: TB Schardl Date: Wed Aug 5 17:45:32 2020 +0000 [LoopSpawningTI] Remove the old Cilksan instrumentation hooks when moving instrumentation out of a serial loop from multiple task exits. commit d7976d47537d22b9b37ddd8a849a5ec71ee4dd58 Author: TB Schardl Date: Wed Aug 5 17:43:33 2020 +0000 [CilkSanitizer] Add option to disable hoisting of intstrumentation out of loops. commit 1125c80d002470d0d4c9ab740878daad702f4518 Author: TB Schardl Date: Wed Aug 5 17:35:35 2020 +0000 [CilkSanitizer] Reorganize the logic of setting up the CFG for CilkSanitizer instrumentation, and address some bugs including bugs with handling analyses. commit f340292d21079392adc1c00efce87775ca982699 Author: TB Schardl Date: Wed Aug 5 15:35:03 2020 +0000 [TapirTaskInfo] Only compute the taskframe-tree information once when multiple calls to find the taskframe tree occur on the same TaskInfo analysis. commit dcd3f2952636009591df71a5d181bb31eb4d3178 Author: TB Schardl Date: Wed Aug 5 15:33:02 2020 +0000 [TapirTaskInfo] Clear data structures storing taskframe-tree information when releasing memory. commit 531bf147ae921277dcf1c3891e8b9b545e46c303 Author: TB Schardl Date: Wed Aug 5 15:30:24 2020 +0000 [cilksan] Add strand-level collection of statistics. commit ae3e0ca611c0cf7f5bd640dedeb1c1b69a587a12 Author: TB Schardl Date: Wed Aug 5 15:28:08 2020 +0000 [cilksan] Fix minor bugs and add more assertions and more explicit debug output. commit 7898097b0ca73f767b4ec2f138d91e5764407b3a Author: TB Schardl Date: Tue Aug 4 12:25:44 2020 +0000 [CilkSanitizer] Add option to disable MAAP checks commit 8093ac0d91b7e4be5281c97c6990cad6508bf62a Author: TB Schardl Date: Tue Aug 4 00:13:42 2020 +0000 [CilkSanitizer] Fix insertion of after-loop hooks before task exits in Tapir loops with exceptional returns from the loop body. commit 27ce89d8449242ab76c99b9dc1eb3d5695eaa2fe Author: Grace Yin Date: Mon Aug 3 11:45:37 2020 -0400 cleanup commit 561d613ec41ae481c7a85867c700961a457c31a4 Author: Grace Yin Date: Mon Aug 3 11:21:50 2020 -0400 hoisted range can be an expr (messy) commit 586a8a839d4030b6c35ca0addb7761afab744934 Author: Grace Yin Date: Wed Jul 29 16:45:50 2020 -0400 hoisting (messy) commit b95acf68feefce11f4c4dc4a4b38af80e4b39c36 Author: TB Schardl Date: Wed May 13 15:16:07 2020 +0000 [CilkSanitizer] Rename suppression to MAAP (May Access Alias in Parallel), and add documentation about MAAPs. commit 3263f0be490075d4aa426e4e219f16f1a276bed8 Author: TB Schardl Date: Sun Aug 16 13:51:22 2020 +0000 [cilksan][CommonArgs] Rename internal method in OpenCilk runtime that is interposed for reducer race detection, in order to clearly denote that method as an internal runtime method. commit a4e388ae9c13e84f13b54d322249a5a005d1b519 Author: TB Schardl Date: Tue Aug 4 00:15:05 2020 +0000 [LoopUnswitch][TapirUtils] Minor fixes to code formatting. commit cd8be2eb0086ce79b876140d90c81808e86b81aa Author: TB Schardl Date: Tue Aug 4 00:13:42 2020 +0000 [CilkSanitizer] Fix insertion of after-loop hooks before task exits in Tapir loops with exceptional returns from the loop body. commit 949497b3c483250fde829f00cdd4a0b5a3eb4adf Author: TB Schardl Date: Mon Aug 3 18:28:51 2020 +0000 [LoopSpawningTI] After processing a Tapir loop, move Cilksan instrumentation for parallel control flow out of the serialized loop, in order to respect the loop's grainsize. commit 39fc5c35386266fb5c1fb0db76dcf458863a5bf7 Author: TB Schardl Date: Mon Aug 3 18:25:38 2020 +0000 [CilkSanitizer] Add command-line option to insert only instrumentation relevant for shadow-memory maintenance or series-parallel maintenance. commit 40e5e042992e8b83e6c4916a69042f3398e36fcf Author: TB Schardl Date: Mon Aug 3 18:16:37 2020 +0000 [CilkSanitizer] Remove unused command-line option commit 42d78683f9eeb0a196c9cc0d73b2e82bcbb6d55c Author: TB Schardl Date: Mon Aug 3 15:28:16 2020 +0000 [cilksan] Update shadow-memory structure to track 32-byte lines, rather than 8-byte lines commit 7a8ada7a0b1ba0d90196d1724381449437a46aa7 Author: TB Schardl Date: Mon Aug 3 15:24:33 2020 +0000 [cilksan] Compile Cilksan runtime with -fno-exceptions. commit fe9f706d63b94b1e07b2e9e17635a4ff976587ff Author: TB Schardl Date: Mon Aug 3 15:23:49 2020 +0000 [cilksan] Add separate debug output for operations that update shadow memory for the call stack of the program-under-test. commit 5e3ad375df79d0efc014e577cb8de11c609e9315 Author: TB Schardl Date: Mon Aug 3 15:21:40 2020 +0000 [cilksan] Add fine-grained stat collection on number of reads and writes instrumented of different sizes. commit 76cf1c6d77524b981d4cbd8e55206d50b3c5f4c8 Author: TB Schardl Date: Mon Aug 3 15:19:59 2020 +0000 [cilksan] Fix memory leak and assertion failure when handling a 0-byte memory access. commit 014e2a1617f32a1a03420a4fa828ee2caf844730 Author: TB Schardl Date: Mon Aug 3 15:15:01 2020 +0000 [cilksan] Update Cilksan to use OpenCilk runtime interface for forcing reductions commit 3b336bcc7d6484a261cd3bb5da58159139154ef4 Author: TB Schardl Date: Fri Jul 31 16:00:35 2020 +0000 [cilksan][CommonArgs] Use function interpositioning on Linux to disable cilksan instrumentation when reducing views of reducer hyperobjects, and update cilksan to use opencilk runtime interface for checking reducer races. commit 99609cb5045512177510f213a8e2fb633ac073dc Author: TB Schardl Date: Sun Jul 26 19:16:18 2020 +0000 [SimplifyCFG] Properly handle predecessors of trivial detached block when serializing commit 3f4a691e076e5f59ac889d585393666116a4add7 Author: TB Schardl Date: Fri Jul 24 14:32:10 2020 +0000 [LoopUnswitch] Fix PHI node maintenance when unswitching loops that contain Tapir tasks. This commit addresses issue #13 commit d8820af091c93bbe2483476c697bbdaa1983f6f2 Author: TB Schardl Date: Wed Jul 22 23:11:40 2020 +0000 [CSI] Remove invalid assertion on SizeTable entries commit fdff5338a4493c2a349e1d9322ae50a316877ed4 Author: TB Schardl Date: Wed Jul 22 23:09:46 2020 +0000 [CSI] Declare __csi_init and __csi_unit_init to be strong symbols, to ensure that the linker properly resolves those symbols in tools where only those functions, and no other hooks, are defined. commit 8cf8bb9432ae791cd1ccee477ae48b0a41acaeeb Author: TB Schardl Date: Sun Jul 19 17:40:50 2020 +0000 [cilksan] Expose Cilksan API for user code to mark regions that should be ignored for race detection commit d153f5be36dde15b6e923cd9ac0d6a5e737843e1 Author: TB Schardl Date: Thu Jul 16 20:04:00 2020 +0000 [TapirLoopInfo] Make more analysis remarks available when LoopSpawning fails to transform a Tapir loop commit 5d5b600867351274b65b9b81c33aa54c6060ebda Author: TB Schardl Date: Thu Jul 16 20:02:24 2020 +0000 [TaskSimplify] Incorporate CFG simplification at the end of the task simplification pass commit 20d05bac3948e68f93100ef0782856d97e762518 Author: TB Schardl Date: Thu Jul 16 19:59:49 2020 +0000 [ValueTracking] Fix bug in which Tapir instructions were viewed as safe to speculatively execute commit 20d9d70e4ae254d582ff476023be6d69c24ef4ed Author: TB Schardl Date: Sun Jul 12 18:00:59 2020 +0000 [TaskSimplify][SimplifyCFG] Properly simplify taskframes and exception-handling code when unassociated taskframes are present. -) Do not remove single resumes that follow the taskframe.resume of an unassociated taskframe. -) When optimizing taskframes, recognize that implicit synchronization occurs when control transfers to a landingpad. This implicit synchronization can imply that an unassociated taskframe in the continuation of a spawn may be necessary, even if it does not explicitly contain a sync. commit 111ac2237e3e25d8657f5af6d302dad5f58cb41b Author: TB Schardl Date: Sun Jul 12 17:57:00 2020 +0000 [CodeGen] When compiling Cilk code, enclose a try-catch in its own taskframe, to prevent implicit synchronization and Cilk-exception logic in a try block from affecting spawns outside of that try block. commit 3fce6a1494fb2a7ca93bdc71ec893d0732d6df42 Author: TB Schardl Date: Fri Jul 10 10:23:45 2020 -0400 [CMakeLists] Modify CPack configuration to point to the license file for OpenCilk commit bc4c1ce79bfd16daa71c51c818aee36ca998ce1d Author: TB Schardl Date: Fri Jul 10 09:42:59 2020 -0400 Add license for OpenCilk modifications to the LLVM project commit ad1754d8958950071386cdf36bb05d776369c664 Author: TB Schardl Date: Wed May 20 12:34:29 2020 +0000 Draft changes to LICENSE, CREDITS, and CODE_OWNERS files for initial OpenCilk beta release. commit 210893c14ea60cd7640e550d49546a5c7ee52458 Author: TB Schardl Date: Thu Jul 9 11:14:23 2020 -0400 [Darwin] Rework flags for linking the OpenCilk runtime on Darwin commit 804b9c2746b8027843cf4142f557d895b4e58bcf Author: TB Schardl Date: Wed Jul 8 21:57:30 2020 -0400 [CMakeLists] Update the bug report URL to point to the OpenCilk issue tracker. commit b5c93cb6ea47dd313c21e287bd2342f294c3007d Author: TB Schardl Date: Wed Jul 8 20:45:53 2020 -0400 [CMakeLists] Update CPack information to use OpenCilk instead of LLVM. commit c4fb36af819b146e93d46c79cb8c951bc6551003 Author: TB Schardl Date: Fri Jul 3 23:30:57 2020 -0400 [ToolChain] Better test to determine whether to use the --enable-new-dtags flag when linking the OpenCilk runtime commit 4e5f9202078e25d9e3a7d4467d6521e819301d87 Author: TB Schardl Date: Fri Jul 3 14:51:30 2020 -0400 [ToolChain] Simple fix to the default flags for linking the OpenCilk runtime system on Darwin commit f370797bb11d6c599f4c7c7d46e93f64fa609fc7 Author: TB Schardl Date: Thu Jul 2 16:12:03 2020 -0400 [Darwin] Fix link flags for Cilksan and Cilkscale on Darwin. commit 625fdef239729c8f98b387eef19b5213202f35a4 Author: TB Schardl Date: Thu Jul 2 16:10:36 2020 -0400 [CMake] Fix CMake for compiling Cilksan and Cilkscale for OSX commit bbcf52cf15bebf9e8da638c2ebc4fca17181033d Author: TB Schardl Date: Thu Jul 2 16:04:58 2020 -0400 [cilkscale] Fix compilation of serial cilkscale tool, when cilkscale is built with a non-Cilk compiler commit 5ba98b15296c8c023d764788e3415df682178cb4 Author: TB Schardl Date: Tue Jun 30 01:27:36 2020 +0000 [Cilk] Add semantic checks to disallow invalid _Cilk_spawns in binary expressions commit 6a913fb1d69c7d6bdc2ec681eecbff7fa67e846f Author: TB Schardl Date: Tue Jun 30 01:20:14 2020 +0000 [Cilk] Emit a diagnostic message, rather than fail an assert, when codegen fails to emit a spawn. Addresses issue #5 commit 1865ff0801fdd42e5acbdf376fc457602e447b61 Author: TB Schardl Date: Fri Jun 26 23:32:47 2020 +0000 [ToolChain] Fix link order of libraries, to ensure that symbols are properly resolved when statically linking the OpenCilk runtime. commit 5d70a1d6d2c460153e5319e2949481849dadc972 Author: Grace Yin Date: Fri Jun 26 17:22:12 2020 -0400 link against C or C++ Cilk personality fn commit c9496784f059b50355e45935a14aa94d6cada694 Author: John F. Carr Date: Thu Jun 25 16:06:05 2020 -0400 Build cilkscale on FreeBSD commit 7c6fcd88dd52c4a89b0b4085801d8059e8f98361 Author: TB Schardl Date: Fri Jun 26 14:50:40 2020 +0000 [CilkRABI] Properly update PHI nodes in successor blocks when inserting the branch conditioned on the result of __builtin_setjmp. commit 66201fa63aa4abebea8af00541aa27b9384d1603 Author: TB Schardl Date: Fri Jun 26 14:47:02 2020 +0000 [CilkRABI] Fix how spawners are marked with the Cilk personality function. commit d9dc096347a4fe745dd57b59498fd0a1d3344eae Author: TB Schardl Date: Fri Jun 26 14:43:06 2020 +0000 [CilkRABI] Allow __cilkrts_cleanup_fiber to return normally, in case there is no fiber cleanup to do. commit a3113ebb25e88a2cd46cbc0ca080b7ec04d32c35 Author: TB Schardl Date: Fri Jun 26 14:38:49 2020 +0000 [CSI] If a sync has a corresponding sync.unwind invoke, insert after_sync hooks at the destinations of that sync.unwind invoke. commit 22a049e1a116a7a083b02d974a588a06e8e18b1e Author: TB Schardl Date: Fri Jun 26 14:35:38 2020 +0000 [CSI] For spawned tasks with taskframes, insert detach_continue hooks for the unwind destination of a detach after the taskframe.resume. Basic blocks in such tasks between the unwind destination of a detach and the taskframe.resume are effectively dead, meaning that CSI hooks inserted into those blocks are not run. commit 185c2777ebd901542fdf153a0655fe40f6140768 Author: TB Schardl Date: Fri Jun 26 14:32:05 2020 +0000 [Cilkscale] At the unwind destination of a detach, update work and span variables similarly to after a sync. OpenCilk exception handling ensures that the function is synced when it reaches the unwind destination of a detach. commit 61c5706c1434336df5f50083a4915cc8bd0470e4 Author: TB Schardl Date: Fri Jun 26 14:29:51 2020 +0000 [CSI][Cilkscale] Add property to detach_continue CSI hook to identify unwind destinations of detaches. commit daef84163fd6573e6996e2b4cbebfbd0da4a1c17 Author: TB Schardl Date: Fri Jun 26 14:15:31 2020 +0000 [Cilkscale] Fix header to ensure a callable wsp_zero() function is available. commit b22958789aa3c5595a12579f9a0422a6be2dcc9a Author: Grace Yin Date: Fri Jun 12 14:22:31 2020 -0400 all spawners must use the Cilk personality function commit 3a4f271baccc268edf0152d720b2555c72bdf154 Author: TB Schardl Date: Tue May 5 03:58:03 2020 +0000 [CilkRABI] Add __cilkrts_cleanup_fiber runtime function, to handle any last-minute fixes to the stack frame upon entering a catch in a spawning function. commit a23c350612beb737f7f610888c206f5005303738 Author: Grace Yin Date: Tue Apr 28 20:50:50 2020 -0400 [CilkRABI] spawn helpers don't get cilk personality or cilkrts resume commit 52e3d5131e57e2b8e304dde99aaf6a7d83b9a004 Author: Grace Yin Date: Tue Apr 28 03:21:40 2020 -0400 [CilkRABI] fixed spawn helper landingpads commit a2aad8c3252d4d63c8cbea2f6f37433099f58a44 Author: Grace Yin Date: Mon Apr 27 03:00:58 2020 -0400 [CilkRABI] added calls to __cilkrts_check_exception_resume/raise in cilk_sync + spawn helper cleanup commit c6e6fac043afeff36433e2cf15a6248f03377ec2 Author: Grace Yin Date: Fri Apr 17 21:25:10 2020 -0400 [CilkRABI] pause_frame now takes exn as an arg, removed store_exn_sel commit 0949b38c167a1b30ccab1788e73dd3d9c6bb7fa7 Author: Grace Yin Date: Wed Apr 15 21:45:48 2020 -0400 [CilkRABI] Only spawners use Cilk personality fn commit 0244f55e89b06292ada5f9893856d89c958edc0c Author: Grace Yin Date: Fri Apr 3 15:39:28 2020 -0400 [CilkRABI] setjmp and pause_frame in spawn helper cleanup commit ed3b3c3db5462efca57df344cf56eb3e8deb1853 Author: Grace Yin Date: Wed Mar 25 00:24:34 2020 -0400 [CilkRABI] store exn and sel in the spawn helper cleanup commit 1b4b849ddab8e7ff2eda85af3900fff4df884eeb Author: Grace Yin Date: Sun Feb 9 21:54:46 2020 -0500 cilk personality commit 4e632350dbb8c28e562b840954500d6d0a7a5e97 Author: TB Schardl Date: Mon Jun 22 13:51:59 2020 +0000 [SemaExpr] Add semantic check that at most one spawn occurs among the arguments of a function call. commit aa70167e345c0c5282ee76a30457ed23f6e53491 Author: TB Schardl Date: Mon Jun 22 13:50:14 2020 +0000 [CGExpr][CGAtomic][CodeGenFunction] Allow a spawn to occur among the arguments of a function call. commit 9984fd14e34f64f549da27e782d2086c9344c3e9 Author: TB Schardl Date: Mon Jun 22 02:44:11 2020 +0000 [Driver] Add option to link static OpenCilk runtime library. commit ac5e423fd248ac8661cb0733c1a4146bbb0e0fcc Author: TB Schardl Date: Fri Jun 19 16:26:15 2020 +0000 [TapirUtils] Cleanup the names of basic blocks created when spliting blocks at taskframe boundaries. commit 07dad49675f37a1a24319ec0d1a266d7bf589ffb Author: TB Schardl Date: Fri Jun 19 16:24:50 2020 +0000 [test/Cilk] Fix bug in which test failed to check for the correct basic block in the CFG. commit 3f6449c04ed22cbbc1dd893f5cbde176263d74f7 Author: TB Schardl Date: Fri Jun 19 16:22:33 2020 +0000 [test/Tapir] Add test that removing a taskframe properly updates relevant PHI nodes. commit af4e073e181273d3a3e057a094c8a89f1c9fb4bd Author: TB Schardl Date: Fri Jun 19 16:20:51 2020 +0000 [TapirUtils] Fix bug to preserve dominator-tree analysis during removal of taskframe.resume instructions. commit e74d2808f81be5376b9d72ff347ffdfb1c3ed6b5 Author: TB Schardl Date: Fri Jun 19 16:17:39 2020 +0000 [test/Tapir] Modify existing test to check removal of unnecessary taskframe. commit 47f3f571c5fc5f7c9b0f950dab51396540bc75d8 Author: TB Schardl Date: Fri Jun 19 16:15:42 2020 +0000 [test/Cilk] Update Cilk tests to preserve taskframes inserted by Clang. commit 8e7b4277b2273b81c24911d1d3cdce9d00482556 Author: TB Schardl Date: Fri Jun 19 16:12:51 2020 +0000 [TaskSimplify] Enable task-simplify pass to remove unnecessary taskframes and to hoist code out of taskframes. commit 0bcf71e675032799e051c6f59e5e97b4ef0ce324 Author: TB Schardl Date: Fri Jun 19 16:10:17 2020 +0000 [TapirTaskInfo] Add method to report whether a taskframe spindle has any children taskframes. commit 8ed1d5bb70631fd5a5707e8388619ebaa5aa2107 Author: TB Schardl Date: Thu Jun 18 15:22:41 2020 +0000 [TapirCleanup][TapirUtils] Make TapirCleanup remove any taskframes left in the code before machine-code generation. commit 9d987c2003f224986d7e7a4433f5af855c6854b2 Author: TB Schardl Date: Thu Jun 18 15:19:14 2020 +0000 [JumpThreading] Prevent jump threading from creating control-flow paths that exit an unassociated taskframe, which would violate structural invariants of taskframes. commit 05907877628a62c50cfb145f995f6649f508c53a Author: TB Schardl Date: Thu Jun 18 15:15:52 2020 +0000 [InlineFunction][TapirUtils] When inlining a function that contains Tapir instructions, enclose the inlined code in an unassociated taskframe. This behavior ensures that Tapir instructions -- e.g., sync -- in the inlined function do not necessarily affect spawned tasks in the caller. commit e936095d339deee5ff31f313c014bcaa1c0e1002 Author: TB Schardl Date: Thu Jun 18 15:12:38 2020 +0000 [TapirToTarget][LoweringUtils] Outline unassociated taskframes during Tapir lowering. commit 3a86c75616db63c08bbf1d8c1387df9cb98276a0 Author: TB Schardl Date: Thu Jun 18 15:03:13 2020 +0000 [TapirUtils] Update Tapir utilities to handle unassociated taskframes, which are not used by specific spawned tasks. commit 7b1a90fa267dca9c7a09e12d8af493e7d54d8e36 Author: TB Schardl Date: Tue Jun 16 19:53:29 2020 +0000 [TapirTaskInfo] Analyze taskframes that are unassociated with spawned tasks and terminated by taskframe.end intrinsics. commit cfa0050104ccc55313ec2b8c64140cf167a93b47 Author: TB Schardl Date: Tue Jun 16 19:04:48 2020 +0000 [Intrinsics] Add taskframe.end Tapir intrinsic, to denote the normal (i.e., non-exceptional) end of a taskframe that is not associated with a spawned task. commit 425404433197f4130f9fdd4413c4574ae827cd3c Author: TB Schardl Date: Tue Jun 16 12:13:02 2020 +0000 [CMake] Clean up a comment, and remove a dead library-existence check. commit 44073812483fbc2421868266796a94f2cfc290ea Author: TB Schardl Date: Tue Jun 16 12:09:56 2020 +0000 [TapirLoopInfo] Consolidate code for emitting optimization remark analyses for Tapir loops. Clean up the remarks emitted to be easier for users to understand. commit 85e5d6cd90f1c5fa66f3ad5e208c371886e5dabb Author: TB Schardl Date: Mon Jun 15 14:32:31 2020 +0000 [CGAtomic] Add basic support to perform atomic operations, such as atomic stores, that might have a spawned operand. commit ee243cc746970e3edabe6d503c2f9ea8cd23364f Author: TB Schardl Date: Mon Jun 15 23:42:54 2020 +0000 [Cilkscale] Add output-stream operator for output-file streams. Slightly cleanup the stream output for wsp data. commit 8b14a5dbfb3a7f04f819af95c339a4a22445bb0b Author: TB Schardl Date: Mon Jun 15 23:41:00 2020 +0000 [Cilkscale] Add the prefix 'wsp_' to all Cilkscale probe methods, to avoid namespace collisions with program-under-test. Add wsp_zero() method to get a 0 wsp value for aggregation. commit 1e9af38e04a366396d135f39b9c29510a0fb8d86 Author: TB Schardl Date: Fri Jun 12 13:40:27 2020 +0000 [Cilkscale] Add benchmarking tool that implements the same API as Cilkscale. This benchmarking tool allows programmers to perform benchmarking on a program by simply recompiling the program. The benchmarking tool supports the following features: -) Any uses of Cilkscale's probe API are automatically repurposed to perform fine-grained benchmarking of the program under test. -) The benchmarking tool generates a CSV that corresponds to the CSV generated by the Cilkscale tool itself. -) If the parallel version of the Cilkscale tool and this benchmarking tool are used, or if all calls to print probe results occur serially, then the CSVs generated by both tools will correspond line for line. -) When collecting running-time results, the benchmarking tool uses the same underlying timer as Cilkscale. -) The benchmarking tool performs calls to measure overall running time at corresponding locations to those Cilkscale uses to measure overall work and span. commit 1c81ae6a56239a68fd7dc846bf35b20a140f92c2 Author: TB Schardl Date: Fri Jun 12 13:38:51 2020 +0000 [Cilkscale] Extend API to allow programmers to tag lines of CSV output when printing performance results; cleanup code; add documentation. commit cf09dfe0c8715c8faf8776964206057714012d30 Author: TB Schardl Date: Fri Jun 12 11:56:32 2020 +0000 [compiler-rt/CMake] Fix checks to see whether the compiler supports OpenCilk, rather than checking to see whether the compiler supports Intel Cilk Plus. commit 0cf7efddd75f075bdbf26a3c7888d879795557fd Author: TB Schardl Date: Fri Jun 12 11:53:52 2020 +0000 [CommonArgs] Automatically link the C++ standard library when linking the Cilkscale tool. commit c5da2415ff614f33677a1e2326a7dda315fa0b41 Author: TB Schardl Date: Fri Jun 12 03:27:20 2020 +0000 [Cilkscale] Rewrite Cilkscale tool with several enhancements and features: -) Rewritten in C++ to take advantage of std::chrono for protable timing and the more structured and featureful Cilk reducer library for C++. -) Add support for probing the computation to get the work and span of the computation dag from program start to the probe point. -) Improve the algorithm so that reading a probe takes O(1) time. -) Support addition and subtraction of probes, e.g., to measure the work and span of the computation between to probes. Thus, probes can be used to measure the work and span of a subcomputation, especially when the probes being compared share the same peer set. -) Support printing WSP values extracted from probes and computations on probes. -) Add support for parallel Cilkscale, including parallel calls to print results of probes. Print-outs are guaranteed to be produced in a deterministic order, consistent with the serial execution, even when print-out calls occur in parallel. -) Allow Cilkscale to be compiled using different timers -- rdtsc, std::chrono, or LLVM-based instruction counts -- based on compilation flags. -) Add support for computing burdened span simultaneously with span. -) Allow results to be printed in CSV format either to std::cout or a file whose name is specified by the CILKSCALE_OUT environment variable. commit ec9372d773c196eece4bca3d16aada18fed9ad3d Author: TB Schardl Date: Fri Jun 12 02:50:15 2020 +0000 [CSI] Modify instruction-count measurement to use LLVM's cost model as a deterministic surrogate measure for time. commit 341cc2ede26d735842a54ffa7facb1cd32e431c9 Author: TB Schardl Date: Fri Jun 12 02:42:51 2020 +0000 [compiler-rt] Add header file for Cilkscale probe interface. commit e47462706bc2f22a6600dc5b06089c5b145d1d47 Author: TB Schardl Date: Fri Jun 12 01:22:44 2020 +0000 [InitPreprocessor] Add macros to designate when a program is compiled with some Cilk tool. commit 6447f5bf59b7102c63ce505e24321df076950327 Author: TB Schardl Date: Fri Jun 12 01:15:44 2020 +0000 [Clang] Add support for compiling and linking two additional variants of the Cilkscale tool. The cilkscale-instructions variant measures work and span using the LLVM cost model as a surrogate performance metric to time that produces deterministic results for deterministic programs. The cilkscale-benchmark variant collects running times for parallel Cilk programs, but it supports the same API as the ordinary Cilkscale tool. Hence, programmers need not modify their program to perform benchmarking of their programs for the Cilkscale tool and visualization. commit c724e10c5e52e11575358ceac16b28ed084f9e8a Author: TB Schardl Date: Fri Jun 12 01:10:08 2020 +0000 [ToolChain] Modify the linker flags added for the OpenCilk runtime to make it easier to compile and link Cilk programs with a custom build of the runtime. commit 4d2be42ba61ac6eaf492b26025048a0fe84f0f8e Author: TB Schardl Date: Fri Jun 12 00:54:53 2020 +0000 [Headers] Remove Cilk headers from clang subproject. These headers have been moved into cheetah. commit 3e8744c54f45b1fb38d9fbb3fe01bb6a39f7fe7b Author: TB Schardl Date: Fri Jun 12 00:49:30 2020 +0000 [CMake] Add support for integrating cheetah compilation with the rest of OpenCilk. commit 792e87e115e3175e15bce70623bb3b291e9459ee Author: TB Schardl Date: Fri Jun 12 00:45:01 2020 +0000 [Analysis][SelectionDAG] Update handling of Tapir intrinsics that might be invoked, rather than called. commit cbbf8cdaba6d8474fdf36d9e61fa38e6825192ad Author: TB Schardl Date: Fri May 15 12:01:51 2020 +0000 [test/Cilk] Regression tests to check the insertion of implicit syncs (before cleanups) due to arbitrary return statements. commit 4e2eb8063d6674b4803b00a8b91b31376c54e3d6 Author: TB Schardl Date: Fri May 15 11:59:14 2020 +0000 [TapirUtils] When splitting taskframe.create blocks for fixing up external uses of variables defined in taskframes, also split critical detach-continuation edges. commit b928e27b98f9caafeddd3682508f9663a91afaaf Author: TB Schardl Date: Fri May 15 11:57:55 2020 +0000 [CilkSanitizer,CSI] Fix basic-block setup to generate a more canonical CFG in the presence of exception-handling code and Tapir instructions. commit ba2d93458f6656a42b8b2e3065189a1aedb043ab Author: TB Schardl Date: Fri May 15 11:52:51 2020 +0000 [TapirTaskInfo] For tasks with no separate taskframes, exclude the unwind destination of a detached-rethrow from the set of taskframe spindles. commit afbb55d40cf7f02f39f16ec42adb95af401928d4 Author: TB Schardl Date: Fri May 15 11:50:56 2020 +0000 [TapirTaskInfo] Fix an assertion to check that the spindle entry after a sync is either a sync or a phi spindle. commit 6e6f1340fb62a334ca95a5c6b912c9f219bf52f3 Author: TB Schardl Date: Fri May 15 11:44:48 2020 +0000 [CGStmt] Ensure that an arbitrary return statement in the middle of a Cilk function performs an implicit sync before cleanups. commit dd2470cb9dd9db0353f6175fae12b8841ecdad36 Author: TB Schardl Date: Wed May 13 00:28:52 2020 +0000 [CilkSanitizer] Clean up some debug output. commit abcec62055a87cbf105714c75508de8e7bb3e2dc Author: TB Schardl Date: Wed May 13 00:28:13 2020 +0000 [CilkSanitizer] Set noalias suppression information for allocas. commit 26a969a5170d54c11d394c16ef454971f7463217 Author: TB Schardl Date: Wed May 13 00:27:12 2020 +0000 [CilkSanitizer] Use suppression information for local races, and set suppression information based on local races. commit 29d7ffadfb06176200ba51ecabb64b6eb5b9a81c Author: TB Schardl Date: Mon May 11 19:12:19 2020 +0000 [CilkSanitizer,FunctionAttrs] Remove unnecessary derivation of custom norecurse attribute, and revert corresponding change to FunctionAttrs. commit 938641562c83c0f30513a7cd82c05cfcc937b282 Author: TB Schardl Date: Mon May 11 18:53:59 2020 +0000 [CilkSanitizer] Fix setting and propagation of noalias suppression information, i.e., to ensure that passing a noalias pointer to multiple arguments of the same call invalidates noalias. commit b7eb5e09f9875b0340b9ffeaa5eda61deaf0c3db Author: TB Schardl Date: Mon May 11 18:30:15 2020 +0000 [DependenceAnalysis] Analyze dependencies between call instructions conservatively when we do not know the size of the memory location accessed. commit 939bc386ea8edd55ec2e2ab058e407f311d44bbb Author: TB Schardl Date: Mon May 11 18:27:14 2020 +0000 [TapirRaceDetect] If the two endpoints of a race do not share any base objects - e.g., because they access different base objects that might alias - do not use any common-object loop for analysis. commit b90efc810a21e5c712ca54e444de5ddf13fa79e5 Author: TB Schardl Date: Mon May 11 18:25:26 2020 +0000 [TapirRaceDetect] Cleanup code to use unsigned integers for bit arithmetic. commit a001f800d5aff8d10c1e70272eefd0d6a2ac0947 Author: TB Schardl Date: Wed May 13 00:15:47 2020 +0000 [TapirUtils] Fix dominator-tree updating when serializing a detach with an unwind destination, e.g., as part of loop stripmining. commit 7f2896ac6b217afbfee38cccce66beaaf7c5e3b6 Author: TB Schardl Date: Fri May 8 14:09:06 2020 +0000 [Tapir] Fix bugs with lowering complex tasks to Cilk backends. commit 7413fabbe2de83bcacebdc8cd740448c9ef57282 Author: TB Schardl Date: Thu May 7 02:08:48 2020 +0000 [CilkRABI,Tapir2Target] Add the '-use-external-abi-functions' hidden flag, and use this flag to direct CilkRABI to use external implementations of Cilk ABI functions, rather than compiler-generated versions. commit d79c7a17304384bfdfc15c439f668f90b074b5c8 Author: TB Schardl Date: Tue May 5 02:17:43 2020 +0000 [CilkRABI] Restore an optimization on calls to leave_frame removed for debugging purposes. commit e466a8226421f550b4c11022b230327df7ee6123 Author: TB Schardl Date: Tue May 5 02:36:46 2020 +0000 [CilkABI,CilkRABI] Fix insertion of cilk_sync implementation when the cilk_sync may have to handle an exception. commit 21f6980694b397843e8c9c6a03dc858684c760ad Author: TB Schardl Date: Mon May 4 14:56:21 2020 +0000 [CilkRABI] Consolidate code for computing frame-version flag. commit 99a8c1d01d9799530260be4907668fe132754e9d Author: TB Schardl Date: Mon May 4 14:51:08 2020 +0000 [CilkRABI] Fix bug with inserting __cilkrts_enter_frame and __cilkrts_detach calls out of order when lowering tasks with no taskframe intrinsics. commit e93c91cf8f100c12adc52aa4d2fd4bcbfaa7eb9b Author: TB Schardl Date: Mon May 4 14:28:30 2020 +0000 [CilkABI,CilkRABI] Remove unnecessary comment. commit 1b3339cada7e307c15ef2ff53b2363ac0b5f32cb Author: TB Schardl Date: Mon May 4 14:23:51 2020 +0000 [ToolChain] Link distinct runtime libraries for the opencilk and cheetah backends. commit ddfd0d404f1d7a4d128cfd7426fe5de5697fc3cc Author: TB Schardl Date: Fri May 1 15:13:57 2020 +0000 [CGCleanup] Fix bug causing one cleanup to be dropped from the implicit-sync landingpad. commit d26601be863b3a8687ca219322f643de55c15499 Author: TB Schardl Date: Fri May 1 15:08:07 2020 +0000 [CodeGenFunction] Clean up merged code for inserting an implicit sync at the end of a function. commit 0298a252fd495c486420c39d6cba8ff4552f951c Author: TB Schardl Date: Wed May 6 01:11:24 2020 +0000 [CilkRABI] Don't insert calls to pop_frame and leave_frame before resumes in spawners. In this case, the Cilk personality function will take care of calling pop_frame and leave_frame. commit c7f643a80c00e3db725af4a68f5fdda533972a62 Author: TB Schardl Date: Thu Apr 30 14:18:45 2020 +0000 [TapirToTarget,CilkABI,CilkRABI] Refactor Tapir lowering process to provide more fine-grained control in target-specific code transformations. Refactor Cilk backends accordingly. commit 660f294780f8aa67d41afd8cbe7e83ac11da695b Author: TB Schardl Date: Wed May 6 01:07:23 2020 +0000 [CilkRABI] Mark __cilkrts_sync as nounwind. commit 9fa3625fd81a34810d0db3664aebae43ca4d97ef Author: TB Schardl Date: Wed May 6 01:06:36 2020 +0000 [TapirToTarget,CilkRABI] Prototype fix Tapir lowering of code that requires landingpads with Cilk routines. commit 38bbf66d7e63c28e00e20585ed5403056116b8b7 Author: Grace Yin Date: Tue Apr 28 03:21:40 2020 -0400 fixed spawn helper landingpads commit d36737941eb40ce0413a8dc10906ae263b20a941 Author: Grace Yin Date: Thu Mar 12 18:39:10 2020 -0400 fixed bug where phi nodes in an always_inlined spawned function compiled at -O0 might refer to uncloned, unreachable blocks. commit 7aaebbf054ad09051aef1eec7648ec34afcae010 Author: John F. Carr Date: Thu Apr 23 16:22:58 2020 -0400 Find the OpenCilk runtime in the LLVM runtime library directory commit 721b5c6ad88d58b6e31593728714bcb59e9fef63 Author: John F. Carr Date: Mon Apr 20 15:07:38 2020 -0400 Call AddTapirRuntimeLibArgs on FreeBSD commit af93a4426d8c6d744901284bf029f81bd9187952 Author: John F. Carr Date: Wed Apr 8 16:55:31 2020 -0400 Update hyperobject definition for OpenCilk ABI commit 3fc1457d0856feef6aae7ac0464775f98740379d Author: John F. Carr Date: Wed Apr 8 15:12:16 2020 -0400 -fopencilk implies Cilk ABI version 2 commit d3f87fd8e7ed5291ae46f57e0660f0a88ab2796c Author: John F. Carr Date: Wed Apr 8 12:57:02 2020 -0400 Add the -fopencilk flag commit da13cc3445fec754d2361cbb4a670c1b40c46366 Author: John F. Carr Date: Tue Apr 7 16:43:35 2020 -0400 Fix C++ library used by Cilk sanitizer commit b140340cf46c6829e5dd77a69aee79a87255da7f Author: John F. Carr Date: Tue Apr 7 09:56:48 2020 -0400 cilksan FreeBSD porting commit 9dfce92a364e76e3ba7ccee8751777fb6fe7ee1e Author: John F. Carr Date: Thu Apr 2 12:18:18 2020 -0400 ABI change commit b0fbea33c229a99e6dddcba0220eb26498b4526a Author: John F. Carr Date: Wed Mar 18 11:27:44 2020 -0400 OpenCilk ABI commit 3ebebdf1600369bfe18478920032e64d0264faa3 Author: John F. Carr Date: Sun Mar 15 15:54:47 2020 -0400 New ABI commit 7a79310ea4aefc27fbbc5fe4b5de1ab5adfa9e3e Author: John F. Carr Date: Mon Feb 17 12:39:08 2020 -0500 Relax some memory ordering constraints commit 904da7c3cb17666703c0c25b7c0ecff37b71c7fe Author: TB Schardl Date: Wed May 6 01:27:50 2020 +0000 Reduce compiler warnings. commit 6cabb6a079fb65bd6d590b2875c605f6c0c6363a Author: TB Schardl Date: Wed May 6 01:27:06 2020 +0000 Improve -ftapir= target checking commit 197f69a98a9ae03d4fd16780736e7759764865d7 Author: TB Schardl Date: Sat May 2 01:35:31 2020 +0000 [InlineFunction] Replace inlined resumes recursively based on nested tasks and taskframes. Ensure that this handling does not overwrite existing unwind destinations of tasks or taskframes. commit 4d8a5341c03052c5c7554846d353955ecf5a1d03 Author: TB Schardl Date: Sat May 2 01:32:19 2020 +0000 [LoweringUtils] Ensure that functions with task.frameaddress intrinsics are lowered. commit 113ba662d58e0aeeb440727edc85dca9bd271819 Author: TB Schardl Date: Sat May 2 01:31:12 2020 +0000 [LoopSpawningTI,LoweringUtils] Move static allocas in taskframe.create basic blocks. commit c3e81dc4ab91b8aff8e0ce127b5871144c966c1e Author: TB Schardl Date: Sat May 2 01:28:56 2020 +0000 [CilkABI,CilkRABI,OpenMPABI] Clean up code to reduce compilation warnings. commit 7fc85e8c745fb9e59e0464d64cbc8ae2de9da56b Author: TB Schardl Date: Sat May 2 01:27:04 2020 +0000 [CGCilk] Document insertion of detached.rethrow intrinsics for spawned tasks. commit e364060038a8960744098a36e4f0c5b41f05f83e Author: TB Schardl Date: Sat May 2 01:24:31 2020 +0000 [CilkSanitizer,CSI] Handle taskframe.resume and sync.unwind instrinsics when instrumenting a function. commit fbe112e4201b7b74986f85827a555bf4d49b662d Author: TB Schardl Date: Sat May 2 01:21:32 2020 +0000 [CilkABI,CilkRABI] Fix PHI nodes in unwind destination of sync.unwind after lowering to a Cilk runtime. commit bf742b824e25bb53248fb177f27988cc737f20eb Author: TB Schardl Date: Sat May 2 01:18:53 2020 +0000 [TapirToTarget,LoweringUtils,Outline,TapirUtils] Modify Tapir lowering to outline tasks based on the taskframe tree. commit 56acb65b06035ac5a6bd00394dc078af33f9163f Author: TB Schardl Date: Sat May 2 00:54:06 2020 +0000 [TapirUtils] Add utility methods to convert calls to invokes with proper cleanup landingpads for nested tasks. Use this method to convert calls to invokes in CSI pass. commit 64caef45457886816dd264164ba74e1fb049d859 Author: TB Schardl Date: Sat May 2 00:34:19 2020 +0000 [TapirTaskInfo] Add support for computing and querying the taskframe tree. commit 708fa1b6cf2fddec71e3e635f2aaeac4c245b54d Author: TB Schardl Date: Thu Apr 30 04:22:09 2020 +0000 [SimplifyCFG] Ignore taskframe instrinsics when identifying trivial tasks to remove. commit 16ee0b3444d30aefa9b847c0bcf9ef7f6167475a Author: TB Schardl Date: Fri Apr 24 14:13:25 2020 +0000 [InlineFunction] Fix bug to handle inlined resumes in nested taskframes. commit 9e5a93c8c29e23919d2b105695ccffb799aa5840 Author: TB Schardl Date: Fri Apr 24 14:12:25 2020 +0000 [InlineFunction] Treat taskframe.create intrinsics as defining entry blocks, where allocas and syncregion.start intrinsics in callees are placed in the caller during inlining. commit 12862e30a924c4c661aa20fc04ec3e2a86dc8292 Author: TB Schardl Date: Fri Apr 24 15:22:16 2020 +0000 [TapirUtils] Treat canonical taskframe.create blocks as entries of detached contexts. commit 66a75a7c07d88746d03ba86631721755d7119d26 Author: TB Schardl Date: Fri Apr 24 14:09:36 2020 +0000 [InlineFunction] Fix bug in discovering calls to covert to invokes when handling inlined tasks. commit ec34ed2faca0efd14cee02caacf7757d93a4704f Author: TB Schardl Date: Fri Apr 24 14:04:14 2020 +0000 [LoopStripMine,InlineFunction] Remove dead code. commit 0cc1b1cf3156dc18f87cc3a3687cdc665d138d28 Author: TB Schardl Date: Fri Apr 24 13:32:37 2020 +0000 [InstCombine] Fix bug with attempting to remove Tapir intrinsics that are still in use. commit 75cfdea31196dcc03d272e09e3e1fd37202c350f Author: TB Schardl Date: Fri Apr 24 13:14:36 2020 +0000 [TapirUtils] Add more flexible utilities for checking for Tapir intrinsics. commit 1388078001034c403e6b714af5e6aab966a5f805 Author: TB Schardl Date: Fri Apr 24 04:09:58 2020 +0000 [CilkABI,CilkRABI,TapirToTarget] Modify Cilk targets to perform more targeted inlining of target functions. commit 8d33a1112b032cdcf19a77098b26d08173ad81d6 Author: TB Schardl Date: Fri Apr 24 03:49:29 2020 +0000 [test/Tapir] Add simple regression tests of basic optimizations operating with sync.unwind intrinsics. commit 280de21a50acd4db309e0804e9dcc718d6031e8e Author: TB Schardl Date: Fri Apr 24 02:39:34 2020 +0000 [Intrinsics] Add documentation to some Tapir intrinsics. commit cadff46c2833788868d3975f6604347f27eb0420 Author: TB Schardl Date: Thu Apr 23 23:06:11 2020 +0000 [CilkABI,CilkRABI] Mark that various opaque Cilk runtime methods do not throw. commit b41757e3ca1f8e313dc0edf0a018f10408eb8081 Author: TB Schardl Date: Thu Apr 23 23:01:56 2020 +0000 [TargetTransformInfoImpl] Treat sync.unwind as having zero cost. commit 37d03920c36567240490657b0fd42cb6f4101f70 Author: TB Schardl Date: Thu Apr 23 23:01:04 2020 +0000 [AliasSetTracker,MemorySSA,TapirRaceDetect,ValueTracking,FunctionAttrs,CilkSanitizer,ComprehensiveStaticInstrumentation] Ignore Tapir intrinsics when performing various analyses. commit 9bf7e8a36320f722c84ab57a3bd26be3895f63fc Author: TB Schardl Date: Thu Apr 23 22:56:22 2020 +0000 [FunctionAttrs] Ignore sync.unwind intrinsics when determining if a function can throw. commit c0ac0d16f20b174eb02184129db4fe3a264ac3c1 Author: TB Schardl Date: Thu Apr 23 22:55:22 2020 +0000 [SimplifyCFGPass] Remove dead sync.unwind calls or invokes when removing syncs. commit fb3e34ea16c5d962ef01854e4ef5d6268b4e1b38 Author: TB Schardl Date: Thu Apr 23 22:54:16 2020 +0000 [InstCombine] Handle sync_unwind when removing calls. commit fd3e1410bac5b2375681753a28f0232ec1516217 Author: TB Schardl Date: Thu Apr 23 22:22:41 2020 +0000 [TaskSimplify] Remove dead sync.unwind calls or invokes when removing redundant syncs. commit 0aed30866af95245fc84cf5bce6255df3c6c9fec Author: TB Schardl Date: Thu Apr 23 22:21:45 2020 +0000 [SimplifyCFG] Remove dead sync.unwind calls or invokes when removing empty syncs. commit 1c33587dbae0d3d96d40bc251d6aedd78aea49f9 Author: TB Schardl Date: Thu Apr 23 22:11:47 2020 +0000 [TailRecursionElimination] Effectively ignore calls to sync.unwind when performing tail-recursion elimination. commit 7d0ac0b483b4d67ad0d29e5a9f4b1dc72a6ed75e Author: TB Schardl Date: Thu Apr 23 22:17:12 2020 +0000 [TapirUtils] Add utilities for checking for and removing sync.unwind calls and invokes. commit 9da0ff92a1fc5c5d4c60356d0fd363f8c87a4fe4 Author: TB Schardl Date: Thu Apr 23 22:10:39 2020 +0000 [TailRecursionElimination] Insert calls to sync.unwind after syncs inserted by tail-recursion elimination, if necessary. commit ad5748f161b7dfbe4b02ae982b6e23906028e733 Author: TB Schardl Date: Thu Apr 23 22:06:49 2020 +0000 [LoopStripMine] Insert a sync.unwind after a sync inserted by loop stripmining, if necessary. commit dc4a24c67bb336353f61a7373de58f8ce075a147 Author: TB Schardl Date: Thu Apr 23 22:04:07 2020 +0000 [LoopSpawningTI] Insert a sync.unwind after each sync inserted into the outlined helper, if necessary. commit 419d45d4952914a6fae1a3391318230dab866d34 Author: TB Schardl Date: Thu Apr 23 22:00:20 2020 +0000 [BasicBlock] Add helper method to skip sync.unwind intrinsics at the start of a basic block. commit 2715f6fa5ba29cc0dac11dd3d98be3ff9ee7fa28 Author: TB Schardl Date: Thu Apr 23 21:58:20 2020 +0000 [CilkABI,CilkRABI] Modify lowering of syncs in Cilk ABI variants to use the landingpad of a sync.unwind immediately following the sync. commit 1d1811bbe7a02a4b4b4c47a7371c764a6ebec979 Author: TB Schardl Date: Thu Apr 23 21:44:30 2020 +0000 [SelectionDAG] Add default codegen behavior for sync.unwind, namely, that sync.unwind does nothing. commit 29fd9bb80be65639c11f941e94557baf06b27795 Author: TB Schardl Date: Thu Apr 23 21:42:39 2020 +0000 [Verifier] Add sync.unwind to the set of intrinsics that can be invoked. commit d8139095f777d0a8fd2bec069db2fcc341c361e9 Author: TB Schardl Date: Thu Apr 23 21:37:39 2020 +0000 [CGCilk] Insert sync.unwind after sync instructions. commit b47f0f42b9a2e3acf7583c700b7830023f5b3491 Author: TB Schardl Date: Thu Apr 23 21:26:07 2020 +0000 [Intrinsics] Add the sync.unwind intrinsic to maintain an unwind destination for a sync. commit 3e0f6281daa7a5c621b27addb757265fcdfee64e Author: TB Schardl Date: Tue Apr 14 18:15:50 2020 +0000 [github] Add GitHub actions for running regression tests on push, based on GitHub actions in llvm-project release/10.x. commit a6d33a62d5d944837c7376bbd934b959af7ed147 Author: TB Schardl Date: Sat Apr 18 21:20:25 2020 +0000 [test/Tapir] Update test of lowering Tapir tasks that contain exception-handling code. commit 1b335ef2c3fa6e22691bf364be242d87c3d9f721 Author: TB Schardl Date: Sat Apr 18 21:19:30 2020 +0000 [LoopSpawningTI,Outline,LoweringUtils] Modify Tapir outlining to handle the placeholder unreachable destinations of detached.rethrow and taskframe.resume instructions. commit 6297bc924d50022ce8703285f2d974bf0d8a62a0 Author: TB Schardl Date: Sat Apr 18 21:23:35 2020 +0000 [Outline] Modify outlining to emit novel exit and unwind blocks at the end of the generated helper function. commit 20b23fa79db70bc6a285557f78cb7f9a9206c924 Author: TB Schardl Date: Sat Apr 18 21:22:18 2020 +0000 [LoopSpawningTI] Stop loop-spawning from necessarily inserting syncs before resume instructions in outlined functions. commit c2b89a9f3199f3c2ae6d9be5a7f112bd88695d92 Author: TB Schardl Date: Sat Apr 18 21:14:05 2020 +0000 [TapirUtils] Fix detach serialization to remove taskframe.resume intrinsics. commit 6f63db59c89d4e7c1b9335cc352b845fe16f7895 Author: TB Schardl Date: Sat Apr 18 21:08:49 2020 +0000 [TapirToTarget,TapirUtils] Ensure that taskframes are canonicalized before lowering Tapir tasks. commit 870e751c160b0e98e95f292881ca462732fd9974 Author: TB Schardl Date: Sat Apr 18 20:53:35 2020 +0000 [LoweringUtils,TapirUtils,test/Tapir] Clean up some code and comments. commit 93593daa6ab480d2f43f436eb3cbee169f63f5a3 Author: TB Schardl Date: Fri Apr 17 15:46:43 2020 +0000 [TapirTaskInfo] Ensure that taskframe spindles are created only when the taskframe.create intrinsic is in canonical form. Fix sense of helper predicate for checking for the canonical form of a taskframe.create. commit 1a3cc55a08f32ec8349e07d93297d8f64d7fb294 Author: TB Schardl Date: Fri Apr 17 15:43:44 2020 +0000 [CGCilk] Fix bug when emitting a spawned task requires subsequent emission of a resume block in the parent. Cleanup and comment code related to Cilk codegen. commit 59dbd21f8823d996eb9d560b1cc5a9d7a4d0be68 Author: TB Schardl Date: Tue Apr 14 13:21:50 2020 +0000 [InlineCost] Adjust function-inlining cost analysis to treat task-returns and syncs as free. commit 6763fe5428aab54ac68da59c851fed66c03e5687 Author: TB Schardl Date: Tue Apr 14 13:18:47 2020 +0000 [TargetTransformInfoImpl] Update cost model to treat standard taskframe intrinsics as free. commit 1c15227d10a93ddb7201d46b9edf37d2a849569a Author: TB Schardl Date: Tue Apr 14 13:15:19 2020 +0000 [InlineFunction, SimplifyCFG] Fix function inlining to restrict propagation of landingpad clauses between inlined tasks and callers. commit b2188ea9db8d2658007ab74de2e84a7ddcebcfe4 Author: TB Schardl Date: Tue Apr 14 12:51:38 2020 +0000 [TapirTaskInfo] Create taskframe spindles only when taskframe.create intrinsic is in canonical form. commit 79dc104029951cd7fe76eac329098485e5289792 Author: TB Schardl Date: Tue Apr 14 12:38:31 2020 +0000 [CodeGen] Emit implicit syncs before implicit destructors and other cleanups. commit 1aaa4f50ba5c3bef140c44c8c89e6517514b4604 Author: TB Schardl Date: Tue Apr 14 12:30:57 2020 +0000 [CGCilk] Clean up emission of basic blocks for _Cilk_for loops, to keep emitted basic blocks near each other and in a logical order. commit 3a3a6867880ed6e1627d99743170d94b1a6f29c3 Author: TB Schardl Date: Tue Apr 14 12:33:19 2020 +0000 [test/Cilk] Test the emission of builtins with Cilk constructs. commit d4bf6f338270eb6a11c3840d4b8821ff67d04a41 Author: TB Schardl Date: Wed Apr 8 17:26:49 2020 +0000 [CGCilk] Rather than fail an assertion, emit a warning if clang fails to emit a spawn. For example, clang may fail to spawn certain builtin functions, such as __builtin_asume. commit fe34c6c31ee472fbdce1c322b5058dc1b3efbdb5 Author: TB Schardl Date: Wed Apr 8 17:15:03 2020 +0000 [CGBuiltin] Add support for spawning more builtin functions. commit d1b01b6ea537e25e3bf01973265fb1444f0c9487 Author: TB Schardl Date: Wed Apr 8 16:56:21 2020 +0000 [Driver] Clean up comment referring to an old flag. commit 5e4335b607c06edb02c1d9a9443b5e43c8599bbb Author: TB Schardl Date: Sat Apr 4 23:58:37 2020 +0000 [test/Tapir] Fix test broken by more conservative application of DoesNotThrow property to outlined helper functions. commit 59aa39147c31c2fb15b826b86d41de2b4b16c00a Author: TB Schardl Date: Sat Apr 4 23:26:40 2020 +0000 [CGException] When compiling Cilk, insert a separate sync region with an implicit sync for a try block. commit 54eb6ed79d0c5704a4d44320cb7fd04d5a12f0f4 Author: TB Schardl Date: Sat Apr 4 23:25:29 2020 +0000 [CGCilk] When inserting landingpads for nested tasks, do not mark landingpads as catches when the catch is in a parent or other ancestor task. commit 491b98a533363c37a19ac39746a2f291458b7c94 Author: TB Schardl Date: Sat Apr 4 23:22:37 2020 +0000 [CGCilk] Simplify the code for creating a sync region with an implicit sync. commit eb5ab85ed6432c925d95815357cae4c6cf05735d Author: TB Schardl Date: Fri Mar 27 01:39:28 2020 +0000 [SelectionDAGBuilder] Discard any taskframe intrinsics left in the IR during machine-codegen. commit b7270362238ae070730f983bf878a8f7ad3813d9 Author: TB Schardl Date: Wed Mar 25 18:52:20 2020 +0000 [LoweringUtils,TapirToTarget,LoopSpawningTI] Apply the DoesNotThrow property to outlined helper functions more conservatively, since Tapir no longer guarantees that, if a detach spawns a task that can throw, then that detach will have an unwind destination. commit 8263777614da7b06c6bd03cf34c48379f1656469 Author: TB Schardl Date: Wed Mar 25 18:43:20 2020 +0000 [TapirToTarget] Fix typo in parenthesization produced during code cleanup. commit 7013f9b5d42837b073d4737f1ee66c7cb52ce910 Author: TB Schardl Date: Tue Mar 24 15:26:15 2020 +0000 [SROA,Mem2Reg] Add basic blocks containing used taskframe.create intrinsics to the entry blocks checked for allocas to promote. commit b43632320ebe5381faa50738083b139e9780f023 Author: TB Schardl Date: Tue Mar 24 15:20:38 2020 +0000 [FunctionAttrs] Treat taskframe intrinsics like lifetime markers. commit 8a16b83bd95d4f3ac4432fa1f77745227f67e70d Author: TB Schardl Date: Tue Mar 24 15:18:44 2020 +0000 [PartialInlining] Treat taskframe intrinsics like lifetime markers. commit 0c586002bc5ea7441c37e3a0e35705ce0a2f359d Author: TB Schardl Date: Tue Mar 24 15:16:00 2020 +0000 [SimplifyCFG,Local] Update SimplifyCFG to handle taskframe intrinsics and unwind edges from detach instructions. commit 8d8609ed21b254bcfaecd753021b94e04d2b2375 Author: TB Schardl Date: Tue Mar 24 15:11:41 2020 +0000 [InstCombineCalls] Enable InstCombine to remove useless uses of taskframe instrincis. commit 787b1de4d48ee0c849dce32ccd959a832e432f75 Author: TB Schardl Date: Tue Mar 24 15:10:01 2020 +0000 [Verifier] Ensure the verifier allows taskframe.resume intrinsics to be invoked. commit 6aa40378fef0cf77f729a0103edc149523f98821 Author: TB Schardl Date: Tue Mar 24 15:09:00 2020 +0000 [Verifier] Remove dead code. commit 773110a4c10ff62af35a11de1e105c008a60dd79 Author: TB Schardl Date: Tue Mar 24 15:06:59 2020 +0000 [SelectionDAGBuilder] Update codegen to discard taskframe.load.guard intrinsics. commit 301e3ed1d2d95fd7c84bd717682008c4f1a62864 Author: TB Schardl Date: Tue Mar 24 15:05:56 2020 +0000 [TapirTaskInfo] Clean up printed output of task analysis to improve readability. commit ba134e49af6ce2ba8f7b7b6e495691fda839f5a2 Author: TB Schardl Date: Tue Mar 24 15:02:20 2020 +0000 [TapirTaskInfo] Minor code reformatting. commit ef3b4f536fa958cd2288e2c6020bf1184e8448dc Author: TB Schardl Date: Tue Mar 24 14:57:12 2020 +0000 [InlineFunction] Update inliner to insert detached.rethrow and taskframe.resume cleanup landingpads to correctly model exception-handling exit paths from spawned tasks. commit 9825904a65fe68d40b0a3ea73478654f0d7ee493 Author: TB Schardl Date: Tue Mar 24 15:00:49 2020 +0000 [LoopSpawningTI,Outline] Ensure that Tapir lowering inserts cleanup landingpads, rather than catch landingpads in outlined functions. commit 9c01dfae7c273e3e4861e3fe8db0a8e943229856 Author: TB Schardl Date: Tue Mar 24 14:50:11 2020 +0000 [TapirToTarget] Modify Tapir lowering to lower taskframes along with the tasks that use them, and, for Cilk runtime backends, to use the semantics of detach and taskframes to insert Cilk runtime calls. commit 015e66416ffede0b374b2e0657a1ff735aeb532b Author: TB Schardl Date: Tue Mar 24 13:17:25 2020 +0000 [TaskCanonicalize] Add pass to canonicalize taskframes, specifically, to split basic blocks at taskframe.create intrinsics. commit c911c0083305c478a5f78a3521c3dd4632449e5c Author: TB Schardl Date: Tue Mar 24 13:02:27 2020 +0000 [TapirTaskInfo] Add support to analyze taskframes. Specifically, allow taskframe markers to create spindle boundaries, and associate taskframe spindles with the task that uses the taskframe. commit 1cc7f2640ca5be5403d76118801a882685b8b86f Author: TB Schardl Date: Tue Mar 24 12:28:17 2020 +0000 [CodeGen] Modify Cilk codegen and associated unit and regression tests as follows: -) Insert taskframe intrinsics to encapsulate the evaluation of arguments to a spawned function. This encapsulation is needed in particular to capture the invocation of copy constructors for spawned function calls. These copy-constructor invocations must run in the frame of the spawned function but before the continuation of the spawn is allowed to execute in parallel. -) Treat the invocation of detached.rethrow and taskframe.resume intrinsics on exception-handling exits from tasks as cleanups, and in particular, as lifetime markers. This change allows the stack unwinder for handling an exception to properly recognize whether or not a task body can catch an exception. -) Cleanup the Cilk codegen code. commit 31d7c2b46c83574235f6d1348f6c4a8c78ccc520 Author: TB Schardl Date: Tue Mar 24 04:01:34 2020 +0000 [Intrinsics] Add intrinsics for taskframes associated with Tapir tasks. commit 65616b701085b4d10ecbd972dae299d21fdc000a Author: TB Schardl Date: Sat Feb 29 14:53:46 2020 +0000 [CilkRTSCilkFor] Add statistic to count loops transformed to use a __cilkrts_cilk_for call. commit b1303fb6320601ad1916838f4db654f637631909 Author: TB Schardl Date: Sat Feb 29 14:52:15 2020 +0000 [LoopUnswitch] Fix bug where loop unswitch would fail to clone spawned landingpads when unswitching a Tapir loop, thereby producing invalid IR. commit f599982904119e157ac3d2924f639ce719a98a32 Author: TB Schardl Date: Sat Feb 29 14:47:58 2020 +0000 [MachineSink] Clean up bug fix for machine sinking past a EH_SjLj, and add a regression test for the fix. commit e673f054e84c8f287f464f0e8f85bc517a222add Author: TB Schardl Date: Tue Feb 11 05:05:46 2020 +0000 [SemaInit] Fix bug where _Cilk_spawn is ignored on the initialization of a template-type variable. commit 3cabd9f602ca9abca28581657fb5ae71f58afbee Author: TB Schardl Date: Fri Jan 17 13:48:35 2020 +0000 [CilkRTSCilkFor] Move code for lowering Tapir loops to calls to __cilkrts_cilk_for into a separate file, and make it accessible from multiple Tapir targets, specifically, CilkABI and CilkRABI. commit 60d471c7a55ff4c7392ceef8dde02c5898ad17d8 Author: TB Schardl Date: Fri Jan 17 01:20:04 2020 +0000 [CudaABI][QthreadsABI][SerialABI] Ports of LANL Kitsune back ends to new Tapir lowering infrastructure. commit f50df88e867fa11314ee667395bcf0e87d4bcb83 Author: TB Schardl Date: Fri Jan 17 01:21:00 2020 +0000 [LoweringUtils] Bug fix to handle outlined helper functions that do not return void. commit ea014e9e61f8bb02aae4894e3ae3aec248b02f6f Author: TB Schardl Date: Tue Jan 14 19:45:57 2020 +0000 [CilkABI][LoopSpawningTI] Add region timers to gather performance profiles for more parts of Tapir lowering. commit d64bd8a829308bd54c23dcdf7190ebcc798dbb46 Author: TB Schardl Date: Tue Jan 14 13:50:05 2020 +0000 [TapirToTarget] Adjust the debugging behavior of Tapir lowering to verify the whole module and dump the module if verification fails. commit 9a20f5dcee8066d8819e7ae1cae6d6a71812321d Author: TB Schardl Date: Tue Jan 14 13:48:57 2020 +0000 [LoweringUtils][Outline][TapirToTarget] Add region timers to profile the core portion of final Tapir lowering. commit 1249bf6bc653b90ba307b632c1681310bde447a9 Author: TB Schardl Date: Tue Jan 14 13:45:37 2020 +0000 [LoopStripMine][SerializeSmallTasks] Allow LoopStripMine and SerializeSmallTasks to handle loops with a Tapir-loop structure, regardless of their loop-spawning-related metadata. commit 8bbca8d2262fe89ce84a5d6c516af4b0c1f8a7cb Author: TB Schardl Date: Tue Jan 14 13:43:46 2020 +0000 [WarnMissedTransforms] Add warning for missed loop-stripmining transformation. commit 34365e455fc164555fd876818877eaeb6109ae10 Author: TB Schardl Date: Tue Jan 14 13:31:50 2020 +0000 [TapirTaskInfo] Add more useful output when debugging is enabled. commit dd3e5d5551331285e9beb4b1785003fd56a474e9 Author: TB Schardl Date: Tue Jan 14 13:30:49 2020 +0000 [TapirTaskInfo] Fix bug with creating and identifying the exception-handling continuation of a detach. An EH continuation can be hard to define in corner cases, such as when TaskInfo is evaluated with detach instructions that have unwind destinations even though the task cannot throw. These cases do not appear to impact the effectiveness of the analysis, but the analysis should behave reasonably in these cases nonetheless. commit ba18a916d42045cc9c4da93f9da2c9de17b6bfcd Author: TB Schardl Date: Tue Jan 14 13:41:44 2020 +0000 [SimplifyCFG] Treat detached-rethrows like reattaches when simplifying the CFG. Clean up code. commit f4c41ea5932910b723fe1d92ccedb5456820ec25 Author: TB Schardl Date: Tue Jan 14 13:02:43 2020 +0000 [Local] Augment removeUnreachableBlocks to find and remove detach-unwinds for Tapir tasks that do not throw. commit 7162ff8aa2fd91307787757a68d184bfa345910f Author: TB Schardl Date: Tue Jan 14 13:25:07 2020 +0000 [TapirRaceDetect] Adjust how Tapir race detection skips debug instructions when scanning for instructions that might race. commit decb1cf4233111abe091a1502affe062891f8ac3 Author: TB Schardl Date: Tue Jan 14 13:35:29 2020 +0000 [CilkSanitizer] Cleanup code and remove dead code. commit 9e5c9b79e22608afe1204158ae9aaee5ec9fd4af Author: TB Schardl Date: Tue Jan 14 13:34:51 2020 +0000 [CSI][CilkSanitizer] Fix bugs with CSI ID calculation, particularly for inserting CSI ID calculation for unwind destinations of detaches. commit da1788ccd35c913c378dfdec7cfd78f6e542ee2c Author: TB Schardl Date: Tue Jan 14 13:19:34 2020 +0000 [CilkSanitizer] Fix bug to insert instrumentation for a memory intrinsic just once when the intrinsic might race with multiple other instructions. commit b20f17cba09821989d60632c54695f555a557823 Author: TB Schardl Date: Tue Jan 14 13:23:47 2020 +0000 [TapirRaceDetect] Increase default number of uses Tapir race detection searches for a capture. commit a50283e79486a5ad61a1c58f47523655b4c78c62 Author: TB Schardl Date: Tue Jan 14 13:15:25 2020 +0000 [TargtTransformInfo][ValueTracking][CSI] Update various lists of intrinsics that do not lower to real instructions. commit f308e7d22f6b4925d6d0d80bebcff3ebdda4927f Author: TB Schardl Date: Tue Jan 14 13:10:48 2020 +0000 [CSI][CilkSanitizer] Add support for caching results of pointer-capture, detached-use, and underlying-object analyses. Caching does not appear to necessarily improve the performance of these analyses in practice, but further performance testing is needed to determine when caching is valuable. These changes also cleanup some of the code for these analyses. commit cb8328fda5e3ee2e88312b57889c70a7ca5c8e19 Author: TB Schardl Date: Tue Jan 14 04:21:27 2020 +0000 [CSI] Cleanup internal routines to pass relevant arguments. commit 01a63d8a6874f1f1efb1d0f2614fb8d6a5b47d4e Author: TB Schardl Date: Tue Jan 14 12:56:02 2020 +0000 [CilkSanitizer] Infer a version of the norecurse attribute, which does not rely on having an exact definition of the function. commit 37f69894283e6d64333eddf546ca429e5399a24a Author: TB Schardl Date: Tue Jan 14 04:46:34 2020 +0000 [FunctionAttrs] Expose AttributeInferer interface to allow external passes to perform function-attribute inference. commit 49048fabe1a2a9057d88a30d927a798eb44caa17 Author: TB Schardl Date: Tue Jan 14 04:48:59 2020 +0000 [FunctionAttrs] Add list of intrinsics to ignore when determining NoRecurse attribute. commit a298e9a9632b37f870eb5b92adfbff4194776d3d Author: TB Schardl Date: Tue Jan 14 04:43:56 2020 +0000 [AliasAnalysis] Clean up and modernize subroutines for handling Tapir instructions. commit accbd259d6bc9d6683c1d5b99732273d802773f7 Author: TB Schardl Date: Mon Jan 13 21:26:04 2020 +0000 [TapirCleanup] Add Tapir cleanup pass to CodeGen, to handle cases where Tapir instructions are not lowered in advance. commit 52b2a415c1c9d2ecd7978d11b52febaa96399a76 Author: TB Schardl Date: Mon Jan 13 20:34:49 2020 +0000 [TapirRaceDetect] Add invalidation handling. commit 1b5dd6b0f700eb58f88699e59bc02c9d46981524 Author: TB Schardl Date: Mon Jan 13 20:33:06 2020 +0000 [FunctionAttrs] Cache results of calls to GetUnderlyingObjects when determining argmemonly and related attributes. commit 09139279f57f33871a0167e1b96017a8d0c0bdbd Author: TB Schardl Date: Mon Jan 13 20:27:43 2020 +0000 [Tapir] Update copyright on Tapir-related files. commit ae38bf340135cd45dc9a48bad324600e8ba55fd3 Author: TB Schardl Date: Mon Jan 13 17:03:26 2020 +0000 [Tapir] Clean up old, unmaintained code. commit 1a94ed7074ac069ebe72e77681f7a081329d3527 Author: TB Schardl Date: Sun Jan 12 13:52:42 2020 +0000 Bug fixes for LLVM 9 rebase commit e4474c098404d92f8d18501bbb23a8df02518582 Author: TB Schardl Date: Wed Nov 27 13:04:30 2019 +0000 Revert "[Sanitizers] Add permission checks to sanitizer tests, to handle runs in seccomp (e.g., in Docker containers)." This reverts commit 0859adda004118a2ad21399b52bffc38f927aec6. commit 179940dfe95de9b9b50d576c441bc5d8c70ae381 Author: TB Schardl Date: Tue Oct 29 22:16:47 2019 +0000 [Cilksan] Some additional code cleanup, to support compilation on different systems. commit 3753c4d822ca8021c2e89c1b514fb67d382dccdf Author: TB Schardl Date: Tue Oct 29 21:30:37 2019 +0000 [CMake] Enable compilation of Cilksan and Cilkscale on Darwin. commit 3ff5c2e326c2e2cfd7630c1aa96fc9d440f54e29 Author: TB Schardl Date: Tue Oct 29 21:30:13 2019 +0000 [Cilksan] Adding feature tests to enable compilation on different systems, e.g., Darwin. commit de64aabc3b428d3e203824876504c100ba43154a Author: TB Schardl Date: Tue Oct 29 21:28:40 2019 +0000 [test/CSI] Updated flags to relax dependence on a particular filename for the CSI runtime. commit 737c4ac8365e5a0d022c29628366d797366a4699 Author: TB Schardl Date: Tue Oct 29 21:27:48 2019 +0000 [CSI] Updated types in CSI hooks to be more consistent. commit 76a3f1e311a51741976cac4010650b638c6681e7 Author: TB Schardl Date: Wed Oct 16 14:02:16 2019 +0000 [Cilksan] Safer setting of constants based on bit shifts. commit ab2b363b9b628aabae01f2b97638f1059bfdc7f8 Author: TB Schardl Date: Wed Oct 16 14:01:18 2019 +0000 [Cilksan] Minor update to formatting of the tree diagram of the call stacks involved in a race. commit ef32799c4555d7c269df1ea06caa3c6b865a8e01 Author: TB Schardl Date: Tue Oct 15 13:03:11 2019 +0000 [Cilksan] Modified cilksan_assert to raise a signal, in order to facilitate debugging of assertion failures. commit 89021132ad327cad5e914e45f84280d100c8bb5c Author: TB Schardl Date: Tue Oct 15 12:56:06 2019 +0000 [Cilksan] Adding option to trap the program when a race is detected, to allow the user to examine the state of the program at that point. commit d30ef90d48eb779791ac8e80e72c6a12b4cb33b6 Author: TB Schardl Date: Tue Oct 15 03:17:25 2019 +0000 [Cilksan] Print source information for stack allocations, when information is available. commit 59af4fc3c52091bcab0e233f68163fda311b7937 Author: TB Schardl Date: Mon Oct 14 18:10:53 2019 +0000 [Cilksan] Cleanup and coloring for race reports. Added functionality to print the function responsible for a heap-memory allocation. commit 42f48f9001b91e90433aff415248bc5535710a02 Author: TB Schardl Date: Sun Oct 13 12:42:11 2019 +0000 [Cilksan] Fix up race detection and reporting for frees and reallocs. commit b5490dc86d8f413e2d1f6bc1180a1ef9f36db9ce Author: TB Schardl Date: Fri Oct 4 03:24:32 2019 +0000 [Cilksan] Add sync-region information, and maintain multiple P-bags for different sync regions in a function or task. commit 353a252141ff7380653d72a0609d704963d56a92 Author: TB Schardl Date: Sat Sep 7 17:54:33 2019 +0000 [Cilksan] Adding const attribute to pointer arguments. commit 0d1cd10bc1722949b7431f4bcba52ae1821b2169 Author: TB Schardl Date: Sat Sep 7 02:41:17 2019 +0000 [Cilksan] Simplified and optimized interface with the shadow memory. commit ccd151063c169f4f35b2c91cf1125ba643dcf9f6 Author: TB Schardl Date: Sat Sep 7 02:06:15 2019 +0000 [Cilksan] Fast tests for DisjointSet_t find_set routine. commit 5eaf06eb33629208193e4d79ae1739e7ea44007c Author: TB Schardl Date: Fri Sep 6 12:11:53 2019 +0000 [Cilksan] Minor change to the code for recording addresses of instructions. commit 9fe6d72358c829915a3e375ddafacfec1b83a06a Author: TB Schardl Date: Fri Sep 6 12:10:29 2019 +0000 [Cilksan] Cleaning up the code for managing internal stacks around parallel loops. commit 7d5beca007dd5da48d58e4424a1b1bcb92cf162f Author: TB Schardl Date: Fri Sep 6 12:08:32 2019 +0000 [Cilksan] Bug fixes to the management of internal data structures. commit c349f37005f97b5db4ee5f2ac355c5828c9e3d3b Author: TB Schardl Date: Fri Sep 6 11:53:20 2019 +0000 [Cilksan] Fixed bug where stack_ptr value for an SBag would not be populated after the last iteration of a parallel loop. commit e9c6d54805689d88cb78dffb232f727cf1897284 Author: TB Schardl Date: Wed Sep 4 23:04:28 2019 +0000 [Cilksan] Modified internal data structure of shadow-memory allocator to avoid some implementations invoking destructors on the data payload in a Slab. commit ad01761a74069ba73dbeb21e8b6a771b5af171b6 Author: TB Schardl Date: Wed Sep 4 23:03:10 2019 +0000 [Cilksan] Code cleanup, and converting some const's to static constexpr's. commit 31ebf6b0c199999932a6e16336352e38150cc752 Author: TB Schardl Date: Wed Sep 4 23:00:21 2019 +0000 [Cilksan] Using loop instrumentation to optimize SP-bags algorithm for parallel loops. commit 56b64bfbf2795b4da489d519c987565883f1476f Author: TB Schardl Date: Wed Sep 4 22:47:46 2019 +0000 [Cilksan] Added a simple fixed-size allocator for disjoint sets. commit 9981c528a2ab88fc0ede50551070fe2b7dfa44bb Author: TB Schardl Date: Tue Sep 3 12:15:47 2019 +0000 [Cilksan] Update shadow-memory allocator to move pages with newly-freed objects to the front of their free lists, in order to further optimize stack patterns of memory allocation. commit 6fd1a3922fe03eac02b5e29a2b3381053e70d113 Author: TB Schardl Date: Tue Sep 3 12:12:24 2019 +0000 [Cilksan] Standardized assertions to use cilksan_assert. Added properties to Tapir task hooks. Code cleanup. commit 46d43185385bcb94db27c3052edaf9110e7c5e20 Author: TB Schardl Date: Tue Sep 3 12:02:35 2019 +0000 [CSI] Add properties to task entries and exits. commit e7d8755db526e21ed3de39aa04738ed9a39e8321 Author: TB Schardl Date: Tue Sep 3 11:56:13 2019 +0000 [CSI] Add loop instrumentation. commit 418bb3c25d8d90832eb5136db0f625bc89f885af Author: TB Schardl Date: Tue Sep 3 11:51:26 2019 +0000 [CSI] Added constant attribute on FED table accessors. Added aligned_alloc to list of allocation functions. Code cleanup. commit 3a16362658ea44c1209480486dbe75c102315b46 Author: TB Schardl Date: Mon Sep 2 02:27:13 2019 +0000 [Cilksan] Adding header file for offsetof macro. commit 4827f7de7acfb6a3d560d681863fd49a98c03381 Author: TB Schardl Date: Sun Sep 1 21:18:32 2019 +0000 [Cilksan] Code cleanup and fixes to support building Cilksan runtime without debug. commit 9073aaccec2c95493b9df76ce2bc48f3440c4553 Author: TB Schardl Date: Sun Sep 1 21:11:21 2019 +0000 [Cilksan] Adjusting race reports to be more compact and identify the racing instructions earlier. commit 7985f6e5d255f6d48e5ca92f41ddca227656aa36 Author: TB Schardl Date: Sun Sep 1 21:08:21 2019 +0000 [Cilksan] New simple shadow memory implementation. This shadow memory uses a simple 2-level paging scheme, and keeps track of accesses at an 8-byte default granularity that is dynamically refined when smaller accesses occur. No compression is performed, and the dependence on Snappy has been removed. The commit includes a custom fixed-size memory allocator to speed up allocating shadow-memory objects. Some fields in MemoryAccess_t and SPBagInterface, have been rearranged to make specific shadow-memory data structures more compact. Fixed bug with allocation and freeing of bags associated with disjoint sets. commit dfb440a7db110a501d9abf2a71bea2241afe7b28 Author: TB Schardl Date: Sun Sep 1 20:54:10 2019 +0000 [Cilksan] Remove creation of unnecessary shadow frame when entering a task. commit 1ab9c7a068de9edac529d2cc4ac89d815acf65c3 Author: TB Schardl Date: Sun Sep 1 20:50:41 2019 +0000 [Cilksan] Adding basic statistics on number of reads and writes checked. commit 529fcaa5790966326d4a6f79dca184045e645f14 Author: TB Schardl Date: Sun Sep 1 20:47:34 2019 +0000 [Cilksan] Make use of both frame pointer and stack pointer for a call to update the shadow stack. commit 8f5cec4e223a24a16b2b622e7d708ae670aa350e Author: TB Schardl Date: Sun Sep 1 20:43:59 2019 +0000 [Cilksan] Add support for suppressing race detection due to dynamically propagated analysis information and dynamic detection that nothing can be executing in parallel. commit 0e9c46bfefe14292fb925c603796f3d2cd9fc161 Author: TB Schardl Date: Thu Jun 6 13:09:53 2019 +0000 [Sanitizers] Add permission checks to sanitizer tests, to handle runs in seccomp (e.g., in Docker containers). commit 6d1a820ef5270661e35927d347a32e11a313eb2d Author: TB Schardl Date: Wed Jun 5 20:31:02 2019 +0000 [CSI] Adding a newline to remove a compiler warning. commit 32d6381c557bddfa6d8e0552195cd15ccd4ffdd2 Author: TB Schardl Date: Fri Apr 19 07:49:06 2019 -0400 [Cilkscale] Fixing linking flags to build parallel Cilkscale on Ubuntu. commit e1dd66dcf7c1cc801b5cd97a6acc1136cea4c9af Author: TB Schardl Date: Thu Apr 18 21:49:20 2019 -0400 [Cilkscale] Updated cmake configuration to enable compiling the parallel cilkscale tool. commit a407ab50423a046e91f6ca24af9b9a69ba1faa1c Author: TB Schardl Date: Thu Apr 18 21:47:29 2019 -0400 [Cilkscale] Bug fixes to the serial and parallel versions. commit f28424fd13a2fcfbd653057762cb66d63fb4693c Author: TB Schardl Date: Fri Apr 12 07:47:38 2019 -0400 [Cilkscale] Add support to generate dynamic library for Cilkscale tool. commit 4d9e90a270f69f6fb8015bca186e352cfefd9c82 Author: TB Schardl Date: Wed Mar 6 19:45:22 2019 +0000 [Cilkscale] Removing empty hooks from Cilkscale tool. Cilkscale now expects the CSI pass to run with the appropriate options to instrument Tapir instructions only. commit b3bc089eedc474366fe735e052ea9bb97b266957 Author: TB Schardl Date: Sat Mar 16 23:19:37 2019 -0400 [Cilksan] Bug fixes for using Cilksan on JIT-compiled programs. commit c3c8008bb4499bbc3a977e0eb87892ea470a8045 Author: TB Schardl Date: Sat Mar 16 23:19:37 2019 -0400 [Cilksan] Bug fixes for using Cilksan on JIT-compiled programs. commit e3e8db147af9d7f3a001696ee3efb7bbef85c23a Author: Daniele Vettorel Date: Fri Mar 15 19:55:28 2019 -0400 New FED table data structure for the runtimes. Still need to change the object table for Cilksan. commit a5fbb30ddae8664f13c52ddb5a8af82e8eaf435c Author: TB Schardl Date: Fri Mar 15 17:05:02 2019 -0400 [Cilksan] Draft modification to track the stack pointer without referencing proc_maps. commit 5a3fd6d5832b72525fe29698e70f7ecb6e726ba0 Author: TB Schardl Date: Sat Mar 2 04:37:34 2019 +0000 [CSI] Add aligned `new` and `delete` functions to CSI. commit 1749b56fcef7b594558a65ba7ad3602bd898298a Author: TB Schardl Date: Thu Jan 31 22:56:49 2019 +0000 [CSI] Fix null-tool to incorporate latest changes to CSI API. commit 8a7141b3e650efaae952d394ff489f3ed4cd21f6 Author: TB Schardl Date: Thu Jan 31 03:29:16 2019 +0000 [Cilkscale] Fix to incorporate latest changes to CSI API. commit 68cfb672bb4cfbffdc9a23f1b25918562afd7258 Author: Daniele Vettorel Date: Tue Jan 29 17:02:53 2019 -0500 Modified CSI interface (to keep track of runtime spawns) commit 64eb1b8a47b1b7060085dfda16182e7f034c4289 Author: TB Schardl Date: Thu Jan 24 18:29:52 2019 +0000 [CSI] Add CSI hooks for allocation functions -- i.e., variants of malloc and new -- and their corresponding deallocation functions -- i.e., variants of free and delete. commit 1005d1548dfc58a99a711ee160ef495e644e0f67 Author: TB Schardl Date: Sun Feb 24 02:23:11 2019 +0000 [CSI] Updated CSI API. commit d310815d8f10c8405c8ce1e4131052f3ddfb79d7 Author: TB Schardl Date: Wed Dec 19 11:15:04 2018 -0500 [CSI] Modified test to match new behavior of CSI: global constructors are instrumented. commit c5a6cfd18e0db7323553920eef8a6b319c1a4724 Author: TB Schardl Date: Mon Nov 26 12:33:05 2018 +0000 [Cilksan] Code cleanup to address warnings from compilation using Clang. commit c572ab8c89e7d8d21e32d08f993dd076adf6726c Author: TB Schardl Date: Sat Nov 3 23:22:55 2018 -0400 Bug fixes for rebase onto release_70 commit c04e55d73c98865465db445935ee7f415d4952c7 Author: TB Schardl Date: Thu Nov 1 09:00:23 2018 -0400 [Cilksan] Replaced ptrdiff_t with std::ptrdiff_t to address compilation with older compilers. commit 600a72a725108ea28deebe6460aac8acbe6dbb8a Author: TB Schardl Date: Thu Nov 1 03:42:27 2018 +0000 [Cilkscale] Moved Cilkscale timer into a separate header file, and added functionality to make it easy to switch timers between clock_gettime, rdtsc, and instruction count. commit 5882afaff53baa19d0a368750696fa37da5566d7 Author: TB Schardl Date: Thu Nov 1 03:40:21 2018 +0000 [Cilkscale] Updated to handle CSI instrumentation of allocas. commit 4f1a5ee28d35706a1ca2cc972a25c0166380addc Author: TB Schardl Date: Thu Nov 1 02:47:43 2018 +0000 [Cilksan] Fixing several warnings about signed and unsigned types. commit 0593818edab89ec2de656650007a1293a15bf136 Author: TB Schardl Date: Thu Nov 1 02:47:13 2018 +0000 [Cilksan] Bug fix to logic for checking and updating the shadow memory when memory is accessed. commit 3bffad81e0c09653ec87c198c25640612134bd7b Author: TB Schardl Date: Thu Nov 1 02:45:46 2018 +0000 [Cilksan] Some code modernization. commit 0b8c7f8f446d46992cfee8796c8c11b280cbcb09 Author: TB Schardl Date: Thu Nov 1 02:37:37 2018 +0000 [Cilksan] Add support for statically instrumented allocation functions and frees. commit 13c3513ef935b69cbdc0a215e774db3684412eca Author: TB Schardl Date: Thu Nov 1 02:31:21 2018 +0000 [Cilksan] Restructuring the code to consolidate the top-level tool within a C++ global object, which is automatically constructed at program start and destructed at program termination. commit 943dce2d7875c0ef7389bbb6d4a713082dfd7f58 Author: TB Schardl Date: Wed Oct 31 23:39:27 2018 +0000 Revert "[CSI] Adding a property to func_entry and func_exit hooks to identify the 'main' function." This reverts commit a86021de19bc2c89230473d47877332caa5c7463. commit 9569e9aaeeeff9a063f0ceb4bbdcf0992a1b484c Author: TB Schardl Date: Fri Oct 19 21:21:42 2018 +0000 [cilksan] Modifying cilksan build to create static library for cilksan. commit b15bf9f42a9988354f3809b7db182a96075b618d Author: TB Schardl Date: Fri Oct 19 21:19:36 2018 +0000 [CSI] Making alloca and sync hooks better match the other CSI hooks in API design. commit f8a8fbb5ffe6fe5bd656a0614cbbce79c99445db Author: TB Schardl Date: Fri Oct 19 21:18:00 2018 +0000 [CSI] Adding a property to func_entry and func_exit hooks to identify the 'main' function. commit 4f160db5984c98796ffdc067b7934b91a409e2ba Author: Phil Sun Date: Fri May 11 01:54:09 2018 +0000 Fix alloca hook to check if enabled commit 7a705e1d04d24580a12a42e285aef29637897635 Author: Phil Sun Date: Wed May 9 23:28:05 2018 +0000 Add alloca_pc that maps alloca csi_id -> memory addresses commit a6e199176aab33f3399dda10408c3bd7dfdbea68 Author: Phil Sun Date: Wed May 9 22:10:00 2018 +0000 Pass proper csi_id's to record in shadow dict commit ad7c8fe7c33cb011111cd45c190d51824d67cd23 Author: Phil Sun Date: Thu May 3 06:40:29 2018 +0000 Record stack allocations commit 49e0ce2d754a1c7d0c5e5960872e436adaf2f5e9 Author: Phil Sun Date: Thu Apr 19 01:10:32 2018 +0000 More info in output commit 214a871fdc2b0b4c6220c7a7ade648257b6a2ab6 Author: Phil Sun Date: Thu Apr 5 15:23:17 2018 +0000 Check addresses are valid, and consider that acc_loc starts at 0 commit 6c8b480a705886f0818bdce47bb2fa79a3c7232c Author: Phil Sun Date: Wed Apr 4 21:48:03 2018 +0000 Re-do changes from cilktools-tapir branch to here commit 266600524cdc0b265ce7a24bb15e7839703640a5 Author: TB Schardl Date: Tue Aug 28 12:06:30 2018 +0000 [CSI] Fix build of CSI runtime on older systems. commit 170dca0c63e9601a949d11a46aca9dfd21f5b0ca Author: Tao B. Schardl Date: Tue May 22 10:46:51 2018 -0400 [Cilksan] Add interpositioning for calloc, realloc, mmap, and mmap-related functions. commit 29d5ea996690dff311d8785157670e650dd8e684 Author: Tao B. Schardl Date: Tue May 22 10:46:19 2018 -0400 Merge pull request #1 from SuperTech/cilksan-fixes Cilksan record_mem_helper fix commit aa094f8f17c30a5e7e337a63566dd57bd239545b Author: Phil Sun Date: Tue May 29 04:20:14 2018 +0000 Remove previously missed stack_check in compressed_shadow_mem commit b74e170d2e032480c3fd6776acafcc7d441d9fb4 Author: Phil Sun Date: Tue May 22 00:30:53 2018 +0000 Don't do stack check when finding races; clearing shadow memory is enough commit 195f8b3a8791178985c03665ff64dc2d8b8822f2 Author: Phil Sun Date: Tue May 22 00:30:23 2018 +0000 Fix off-by-one in clearing shadow memory commit 65468bfa5091f51c8b34d4a3b20c310822e7fdf4 Author: Masakazu Bando Date: Mon Apr 2 23:04:04 2018 +0000 updated for alloca hook support. commit f22a9ec465c709bbfcd262ee4b2c793219001d09 Author: TB Schardl Date: Thu Jun 28 01:41:04 2018 +0000 [CSI] Rearranging CSI files to ensure that cmake installs csi.h. commit da9a733f5731cfe49294b4df5af36b9d0af4d8fc Author: TB Schardl Date: Wed Mar 21 21:19:30 2018 +0000 [CSI] Added test that global constructors are not instrumented. commit 22735dd33535061899783a6459b0535440a00a73 Author: TB Schardl Date: Thu Feb 22 18:42:46 2018 +0000 [CSI] Adding property to function exits to specify whether the exit rethrows an exception. commit c36fa518f237442d80f71e87ef8678a6ae61e148 Author: TB Schardl Date: Wed Jan 31 14:45:50 2018 +0000 [CMake] Use CMake configuration to properly handle appending the correct -std= flag. commit 5afbe20cb9f0f076c09ce7399e78a0314d841bf8 Author: TB Schardl Date: Mon Jan 15 15:19:28 2018 +0000 [CMake] Fixed typo in CMakeLists. commit 082465ec5597ab61f51678ad8dbccf65be897816 Author: TB Schardl Date: Sat Jan 13 05:40:12 2018 +0000 Bug fixes for initial rebase onto version 6. commit a30deb935dbc1ca1c7097e2777486ac8433a5232 Author: TB Schardl Date: Thu Jan 11 13:56:38 2018 +0000 Squashed commit of the following: commit 628144e51fb3be74ff55ec22722998b1dbd2591e Author: TB Schardl Date: Wed Jan 10 14:24:19 2018 +0000 [CMake] Ensure that cmake errors out if snappy is not installed and Cilksan will be built. commit 2921a527d2cc05d20cd244ac8b8cf5cd81b6186d Author: TB Schardl Date: Wed Jan 10 05:03:17 2018 +0000 [Cilkscale] Fix to CMake variables. commit 8b9408d7d52c5c305b72827b6fa714b5d0622aa2 Author: TB Schardl Date: Tue Dec 5 23:54:48 2017 +0000 [Cilkscale] Adding Cilkscale scalability analysis tool. commit 40cbe7198d1285407ca07bb5f83385485fdf2876 Merge: ba910439c a14c13358 Author: TB Schardl Date: Fri Dec 1 23:18:19 2017 +0000 Merge branch 'master' of github.com:wsmoses/Tapir-compiler-rt into branch 'master' of github.mit.edu:SuperTech/compiler-rt-tapir-csi commit a14c1335830d0082c0652d6477794674b0dfca6b Author: TB Schardl Date: Fri Nov 24 03:13:25 2017 +0000 [CSI] Updating CSI regression tests. commit 19c6be1aa67b71cbaf92686d168ea9486f698e10 Author: TB Schardl Date: Mon Nov 20 17:49:06 2017 +0000 [Cilksan] Fixing build problem. commit 4e2a6297c361993e821d70663ab7a4991cc510b5 Author: TB Schardl Date: Mon Nov 20 16:11:50 2017 +0000 [Cilksan] Initial commit of Cilksan for Tapir. commit ba910439cd0a553ef848e4b45ff4271bb5f38fdd Author: TB Schardl Date: Sun Oct 22 15:46:43 2017 +0000 [CSI] Adding prototype CSI hooks for Tapir constructs. Adding forensic table to keep track of LLVM IR instruction counts for each basic block. commit 86f0f134a849f32e79ffbf6ce836de8cffd8133d Author: TB Schardl Date: Sun Oct 22 15:45:43 2017 +0000 [TSan] Reformatting to appease the linter. commit bdb75fb5923c10ee4865b727f465096d3ca0ab7a Author: William S. Moses Date: Thu Oct 5 16:16:09 2017 -0400 Cleanup commit a1812189a90aa790f651adffba145114cb1aecd3 Author: TB Schardl Date: Wed Sep 27 02:39:53 2017 +0000 [CSI] Adding Tapir instrumentation support. commit e5fc55f275ed41f75f257d3b4226639f6a7526c5 Merge: 356cce18a e8e668dba Author: TB Schardl Date: Mon Aug 28 14:55:42 2017 +0000 Merge branch 'release_50' of github.mit.edu:SuperTech/compiler-rt-tapir-csi into release_50 commit 356cce18a467d5830e010525711d60077d0c3cc0 Author: TB Schardl Date: Fri Aug 25 22:59:01 2017 +0000 [CSI] Updated CSI tests to properly handle CSI properties. commit 01bb821fdfd1570391d1ee4d72bc497faa3e4297 Author: TB Schardl Date: Fri Aug 25 22:57:56 2017 +0000 [CSI] Fixed ordering of FED tables to match ordering implemented by instrumentation pass. commit 9065436fab38c5afc2213da08ca29c3228157b9a Author: TB Schardl Date: Fri Aug 25 22:57:02 2017 +0000 [CSI] Fixed ordering of instrumentation counts and FED tables to match ordering implemented by instrumentation pass. Added basic properties to functions and basic blocks. Added column numbers to source locations. commit d5e1d1450ab5e66208cba59291bb0932707c4cdf Author: TB Schardl Date: Tue Aug 8 23:05:51 2017 +0000 Squashed commit of the following: commit 71c22bcedac24836cff8507dc0e41bcd2878794b Merge: 6b95f9648 66ccf0079 Author: TB Schardl Date: Fri Jun 2 15:44:43 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit 6b95f9648b076e351ad057d3193ad0923e0ebc40 Merge: 60dc811b9 97fc005f6 Author: TB Schardl Date: Wed May 31 01:45:14 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit 60dc811b9e63072928fd3af2fca89f3fbbc5b874 Merge: a4d010dcc 718173908 Author: TB Schardl Date: Fri May 26 12:16:33 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit a4d010dccb8c9cd19accfefb0c739bd771472ef6 Merge: d16e4026a 451043533 Author: TB Schardl Date: Mon Apr 24 15:49:06 2017 +0000 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit 4510435336499c05af77c807798eec4ba1a6ec65 Author: Tyler Denniston Date: Fri Apr 14 10:38:07 2017 -0500 [CSI-TSan] Fix 'undefined symbol' errors when using CSI-TSan on shared libraries. commit 4cb88157f43e78a49ace4340788a1ee4834fb777 Author: TB Schardl Date: Wed Apr 12 02:04:51 2017 +0000 [CSI-null] Encourage inlining of null CSI hooks. commit e1076ccbeb1ddc856d23397f80b91a2735a9f00e Author: TB Schardl Date: Tue Apr 11 15:11:48 2017 +0000 [CSI] Differentiating between property types for different categories of IR objects. Added several new properties. commit d16e4026ab6407041e042dda07d4538a597545fe Merge: 796f898b4 4154f1e99 Author: TB Schardl Date: Mon Apr 3 21:19:26 2017 +0000 [CSI] Merging CSI into compiler-rt checkout compatible with Tapir/LLVM. commit 4154f1e9973d456bc34a3d25dd9dd9d7f0d39384 Author: TB Schardl Date: Sat Dec 31 17:37:50 2016 +0000 [CSI] Add function names to FED tables. commit f277290b342a8922328d75b8e36c2ecdf5c086fb Author: Angelina Lee Date: Sun Dec 4 19:31:39 2016 -0600 Comment out unused __csi_disable_instrumentation flag commit 8b99936641aff04c11dcb647df63a19a65da54eb Author: Tyler Denniston Date: Fri Sep 2 16:10:34 2016 -0400 Disable STL test for now. commit 2c11ec55eb0c0650166fd7d54de563b743b26c17 Author: Tyler Denniston Date: Thu Sep 1 17:07:52 2016 -0400 Use new property struct bitfield. commit 662843a13025616c8dc3170490489e8b387f9bcb Author: Tyler Denniston Date: Mon Aug 15 15:00:42 2016 -0400 Disable instrumentation moved into compiler. commit 1165fb7d1ada6f19258bceb901527048575c56bb Author: Tyler Denniston Date: Mon Aug 15 12:53:50 2016 -0400 Initial implementation of __csi_disable_instrumentation commit 918b750beb64b2d2ee32e6966e7744dc92e6cb9c Author: Tyler Denniston Date: Mon Aug 15 12:31:15 2016 -0400 Add C++ test using STL. commit d78554b1a620a97ed628df4b788093b7e9613e04 Author: Tyler Denniston Date: Mon Aug 15 10:36:07 2016 -0400 Add no-op property parameter to all hooks. commit 7cad8c58c916ee7144c55472705e342da94e9f88 Merge: 3a57261c3 e42331093 Author: Tyler Denniston Date: Wed Jul 13 13:12:28 2016 -0400 Merge remote-tracking branch 'llvm-origin/master' commit 3a57261c3bb5ca6ffddbf8526956bae847d92c4f Merge: 1b980b0ea 28f8c79bb Author: Tyler Denniston Date: Mon Jun 27 09:13:08 2016 -0400 Merge remote-tracking branch 'llvm-origin/master' commit 1b980b0eaa768a08006bd37bc0bb50e015c57bee Author: Tyler Denniston Date: Thu Jun 16 14:49:01 2016 -0400 Add FED test commit 4eb78d43c7a76c3fedeeb20a9b0a9b39586d47d5 Author: Tyler Denniston Date: Thu Jun 16 11:57:00 2016 -0400 Add test for read-before-write property commit 7d819a455658e42a294502e9a17e9959e1f08844 Author: Tyler Denniston Date: Thu Jun 16 11:47:49 2016 -0400 Fix test output names to allow them to run in parallel commit e310651d04e45435aa69019c6cfafccbb04e72d2 Author: Tyler Denniston Date: Thu Jun 16 11:20:40 2016 -0400 Add unknown callsite test commit 5b17bcb73e1a4b75784213b426ce746efe6b9df2 Author: Tyler Denniston Date: Thu Jun 16 11:13:42 2016 -0400 Load property commit 6162841412121611977be08be4759696b4f9fe3a Author: Tyler Denniston Date: Thu Jun 16 11:05:48 2016 -0400 Add unknown ID macro commit 8a768b85a167094e95d7461ed8d47607b7f5bc84 Author: Tyler Denniston Date: Wed Jun 15 17:43:33 2016 -0400 Remove old changes not matching upstream commit 793e6853ad80bf9d9ed64d4ebf3ac6b07afa41e0 Author: Tyler Denniston Date: Wed Jun 15 17:18:48 2016 -0400 Add shared object CSI test (passing) commit 40ae69e55bc692b38081c3c7311d0172c42c1ea0 Author: Tyler Denniston Date: Wed Jun 15 17:18:28 2016 -0400 Fix CSIRT visibility bug commit 214cff3b635bfc37b3e929a111e247328a247930 Author: Tyler Denniston Date: Wed Jun 15 16:55:45 2016 -0400 Add multiple units test (passing) commit 687cb3fa237cc3da845c7a0e1982a2e251ce69e9 Author: Tyler Denniston Date: Wed Jun 15 16:55:24 2016 -0400 Fix runtime bug with empty FED tables commit 5a8adad2eff56d5a980759dc9d7220cdab0b60be Author: Tyler Denniston Date: Wed Jun 15 16:39:13 2016 -0400 First CSI test passing. commit 456af2165c4b4ab94dd6593b72edff786755427e Author: Tyler Denniston Date: Wed Jun 15 16:16:15 2016 -0400 'make check-csi' initially working. The test doesn't check anything yet, but the infrastructure is there. commit d3fba62f917a8d9086926a35794f7faa4a9b9019 Author: Tyler Denniston Date: Wed Jun 15 11:39:06 2016 -0400 Removing old tests. They will come back in a different form. commit 4a10e34eaec8e5e3be8a283dc6c9fa0ba42f2eea Author: Tyler Denniston Date: Wed Jun 15 11:34:50 2016 -0400 Removing toolkit until I figure out a better place for it. commit 42c9b2f043f404edd9fb3a633c57dced85a56443 Author: Tyler Denniston Date: Wed Jun 15 11:33:10 2016 -0400 Remove csirt.c and csi.h from tests. This breaks building tools for now. commit 6597b37233cd9c71dbf987884284298f306e91b3 Author: Tyler Denniston Date: Wed Jun 15 11:27:06 2016 -0400 Start splitting CSI up by lib and test lib/csi will contain the runtime and (for now) csi.h test/csi will only contain Lit tests commit da52217503266046b0cb22c2415f7581457c9f7f Author: Tyler Denniston Date: Wed Jun 15 11:25:21 2016 -0400 Remove old tsan+csi files commit 7e6dbf074de41b61f7b9e6515932c681c6e82138 Author: Tyler Denniston Date: Tue Jun 14 16:10:39 2016 -0400 FED tables are now copied. commit df3cd69526204337c81705a161bb66f14d3d60dc Author: Tyler Denniston Date: Tue Jun 14 15:56:03 2016 -0400 API cleanups commit 7bcd7ee719ffbb70522876c175698f1d3f882b13 Author: Tyler Denniston Date: Tue Jun 14 13:51:47 2016 -0400 Add back callsite -> func id mapping. Mistakenly removed in f994e3d7 commit c89ab1ad9b2a931da9100575064e30cee29d43c6 Author: Angelina Lee Date: Tue Jun 14 11:37:03 2016 -0500 Adding a simple memory tracer tool commit f994e3d7c0c8c5d3c30e2c63eb82917eb56bf3e3 Author: Tyler Denniston Date: Tue Jun 14 11:45:22 2016 -0400 Remove relation tables. commit b1ee9026096a524e753678fc1b0c356a268cb3b8 Merge: bd0531e56 a01fb8f7e Author: Tyler Denniston Date: Tue Jun 14 10:55:02 2016 -0400 Merge remote-tracking branch 'upstream/master' commit a01fb8f7e95879add1e4a686cfcb053e13becc92 Merge: a159bc23b df01596dc Author: Tyler Denniston Date: Tue Jun 14 10:40:12 2016 -0400 Merge remote-tracking branch 'upstream/master' Conflicts: lib/tsan/rtl/Makefile test/tsan/mutexset6.cc test/tsan/test_output.sh commit bd0531e569a50ca8fb7ede07ea9e71612983b996 Author: Tyler Denniston Date: Mon Jun 13 10:52:15 2016 -0400 Demo tool prints in colors. commit a9699746f5d289471f10951e288e754682b6109d Author: Tyler Denniston Date: Mon Jun 13 10:25:30 2016 -0400 Cleanups to demo tool commit a2c22a1958a94166442779799261cd65c59c87e7 Author: Tyler Denniston Date: Wed Jun 8 12:56:04 2016 -0400 Clean up Makefile commit dad28ca144e48fbbdcc9e29b218f5dd3fc047fc7 Author: Tyler Denniston Date: Wed Jun 8 11:13:09 2016 -0400 Update demo tool to not use STL structures commit 845303fa04b0778520a30fe78a17e7e207b572f8 Author: Tyler Denniston Date: Wed Jun 8 10:31:37 2016 -0400 Update demo tool to maintain shadow stack commit 75e3027d522851a77842b6a7fe98af750fb9930d Author: Tyler Denniston Date: Tue Jun 7 14:35:40 2016 -0400 Update dyn test and add demo tool commit 31a2b4a9789546ae874cf1f24093ce749bcafbb8 Author: Tyler Denniston Date: Mon Jun 6 11:00:30 2016 -0400 Update print tool commit 0b6f8bcd2ea7098757d0830f32898b3b9a189a4b Merge: 0c2d334e3 c7e2e4f26 Author: Tyler Denniston Date: Mon Jun 6 10:44:42 2016 -0400 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit c7e2e4f260ef1416fff8f006df13037426d890c2 Author: Damon Doucet Date: Thu Jun 2 02:53:29 2016 +0000 Combine FED tables into a list of structures in __csirt_unit_init commit 4e304f17c688feda096e31b6a8c37347277c0650 Author: Damon Doucet Date: Thu Jun 2 01:48:55 2016 +0000 Add const qualifiers to FED entries and return pointers in accessors commit e1e1a25b7b280832759b98e01fecb6744b47e99e Author: Damon Doucet Date: Thu Jun 2 01:32:16 2016 +0000 Change __csi_before/after_callsite to __csi_before/after_call commit c43a5f10d42e212dabe225fa8742dc7d015a83fb Author: Damon Doucet Date: Thu Jun 2 01:26:59 2016 +0000 Reorder hooks in csi.h to match API doc commit 7cc94d8b7f6ad1254e35cde407d1e568c3574d61 Author: Damon Doucet Date: Wed Jun 1 21:07:59 2016 +0000 Add const to params in __csi_before/after_call in csi.h commit 9f9c853a997ca56c8ee2fd181bf386b12a436ec9 Author: Damon Doucet Date: Wed Jun 1 21:07:13 2016 +0000 Rename __csirt_callsite_target_unknown to have "is" commit 1aabc135bed7f796f08362c07a3098d41d59a909 Author: Damon Doucet Date: Wed Jun 1 21:04:14 2016 +0000 Fix types commit 2301b1b8e4afdc1342c8e4111f53409df1248149 Author: Damon Doucet Date: Wed Jun 1 20:32:37 2016 +0000 Move accessors from csirt.h to csi.h commit 0c2d334e3c64e7afc2fd05ea3343db21bd2d28b5 Merge: 0eaf686da 4f6cea22b Author: Tyler Denniston Date: Wed Jun 1 14:06:06 2016 -0400 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit 0eaf686da2a8c46de3d2c151c1b8e3dcc96ca02e Author: Tyler Denniston Date: Wed Jun 1 14:05:52 2016 -0400 Update comment commit 4f6cea22bcc1a4956e9686e7b7e166c4fd3e7c3e Author: Damon Doucet Date: Wed Jun 1 17:08:03 2016 +0000 Add __csi_after_callsite to csi.h, null_tool, and fed_test commit 43d355d94becf464033c0f58127a914f21fc4461 Author: Damon Doucet Date: Wed Jun 1 16:38:54 2016 +0000 Update csi.h, null_tool, and fed_test to match API better commit ddfb53f805e2beb4cfb0b9db569d6f1af98dd0b6 Author: Damon Doucet Date: Tue May 31 18:59:19 2016 +0000 Change runtime library to better match API commit f0b0bfd3c68dc27829a889131ce4550e9275476d Author: Tyler Denniston Date: Wed Jun 1 10:45:15 2016 -0400 Add new callback to csirt commit e5499f8e7734e68823cbc610dd85d36d75936dbd Author: Tyler Denniston Date: Wed Jun 1 10:29:51 2016 -0400 Update print tool commit 0451b37b20c581fc61ae1eb48b8ebf8fa214901e Author: Tyler Denniston Date: Wed Jun 1 09:53:04 2016 -0400 Update dyn test commit 5ccb4b02bed6655ef7e9e8e37d070d0804ae7701 Author: Tyler Denniston Date: Wed Jun 1 09:29:14 2016 -0400 Print tool now prints FED information as well commit 87fafee58d5c3fc87d198012cfc310a93a2a8653 Author: Tyler Denniston Date: Wed Jun 1 09:20:33 2016 -0400 Update print tool commit 8d3e6d166e54050499149e0f6a57cef86ea650d8 Author: Tyler Denniston Date: Wed Jun 1 09:05:13 2016 -0400 More comments commit 72161e32eef0f3e4c0453c8b8bd256363a1d43ba Author: Tyler Denniston Date: Wed Jun 1 08:58:39 2016 -0400 More comments commit 5e28271641523cb294a4791abcee5b5b99213bb2 Author: Tyler Denniston Date: Wed Jun 1 08:50:14 2016 -0400 Add some comments commit 779e9e1c690d2f23dbf62ce03cb82fd90a611d14 Author: Tyler Denniston Date: Fri May 27 11:26:37 2016 -0400 Use relation tables in API impl. commit fb2441950bc409d6782503782982360ab67c868d Author: Tyler Denniston Date: Tue May 24 09:59:19 2016 -0400 Add skeleton of rel tables implementation commit f7957f485b99b2186ceef01be8d98a1ee1503ef9 Author: Tyler Denniston Date: Mon May 23 10:45:38 2016 -0400 Add instrumentation_counts_t struct for unit init commit e4ad154725490c6bbf52852e1d4725211f1d8c37 Author: Tyler Denniston Date: Tue May 17 15:14:48 2016 -0400 Remove global FED commit 61965e13db755bd2845819e2b4bc67b390e3c6f6 Author: Tyler Denniston Date: Tue May 17 15:11:10 2016 -0400 Add load/store feds commit 6a9c153cb06a8fb670781572900e02a1a8208668 Author: Tyler Denniston Date: Tue May 17 15:01:38 2016 -0400 Add func exit FED commit 243e9be01cbe17aa56720484be39989889490ffd Author: Tyler Denniston Date: Tue May 17 14:14:49 2016 -0400 Add callsite FED commit b0a8520c36496b5149ba85c7769a854308e62892 Author: Tyler Denniston Date: Tue May 17 13:38:14 2016 -0400 Add BB FED table commit ee343d8098be87e920b327406cfdfcb504c5756c Author: Tyler Denniston Date: Tue May 17 10:50:48 2016 -0400 Add separate FED (and therefore ID space) for functions. commit 49c3e2a397a7ffee6c9219773c52bc23090a85d1 Author: Tyler Denniston Date: Tue May 17 10:40:33 2016 -0400 Prepare for multiple FED types. commit 8d4606d411f426d75de599afe04133781cabecd9 Author: Tyler Denniston Date: Tue May 17 10:30:26 2016 -0400 More refactoring commit b35e133bb60bda3dba11d30a65ae941aa04d4021 Author: Tyler Denniston Date: Tue May 17 10:28:40 2016 -0400 Small refactor commit df70be5ce0aeafc1064385a535700a1927f32780 Author: Tyler Denniston Date: Tue May 17 10:15:43 2016 -0400 Rework csirt fed table collection structure commit f0721fbd26ceb974240e9db736edc01d05d8dae8 Author: Tyler Denniston Date: Tue May 17 09:57:01 2016 -0400 Refactor csirt.c commit 3fe6b885f76350770c4ec228008551085710dd5b Author: Tyler Denniston Date: Tue May 17 09:43:51 2016 -0400 Small csirt.c cleanups commit 50cb199561239f3a8b56f62885a8c259966a4d17 Merge: 72a21d227 c94c7154c Author: Bradley C. Kuszmaul Date: Tue May 10 17:47:52 2016 +0000 Merge branch 'printtool' commit c94c7154c26befefd26c6b0df869dbc7b987fd97 Author: Bradley C. Kuszmaul Date: Tue May 10 17:47:46 2016 +0000 Make printtool compile commit 72a21d2276c30cf7643cd320706e3cae3f04d7dc Author: Bradley C. Kuszmaul Date: Thu May 5 16:43:18 2016 +0000 get this to work: make TOOL=print_tool commit a159bc23b41f8e2ecc2edb3cbafa0b3c080adfa6 Author: Tyler Denniston Date: Mon May 2 09:22:19 2016 -0400 Add callsites tool commit 8e5b4e88d09f1570a7698f8601ab3cfd98871851 Author: Tyler Denniston Date: Mon May 2 09:21:41 2016 -0400 Update dynamic lib test commit ac300cc3efea2357829d9bee1d96bb91a6f8fef2 Author: Tyler Denniston Date: Mon May 2 08:56:22 2016 -0400 Add before_callsite test. commit 601ed83b4d88e33c2722e5d70237d036259b46c8 Author: Damon Doucet Date: Tue Apr 26 17:24:55 2016 +0000 Add -g flag to multi module test Makefiles commit c609bef3617aa2b94a83b82bd7f8000f0236eee0 Author: Tyler Denniston Date: Tue Apr 26 10:22:23 2016 -0400 Add back Makefile rule for null_tool. commit 10d5a053b291be20aac6103af52e382f85ff9eaf Author: Damon Doucet Date: Tue Apr 5 01:51:25 2016 +0000 Add CSI Runtime Library, add FED test, update some header files commit 9bef3a2f0e916608808823f473a9fe2924405092 Author: Damon Doucet Date: Wed Mar 30 06:37:36 2016 +0000 Update gitignore files commit 6c4c541e9b1cf52a028d4dc82a8e891adfd0bb22 Author: Damon Doucet Date: Wed Mar 30 06:37:16 2016 +0000 Add dynamically loaded multi module test commit 1a54ef89e4f11635dc115d97c9cd923937566109 Author: Damon Doucet Date: Wed Mar 30 06:36:14 2016 +0000 Update toolkit to have more recent API, as well as csirt header commit d261ca785d7aedd6cd0d42629705a2b95e0fe921 Author: Damon Doucet Date: Wed Mar 30 06:32:58 2016 +0000 Update foo test to include a statically linked unit commit 225f2e2f1f356e50fcaf9846dda50ac051f99506 Author: Damon Doucet Date: Wed Mar 30 06:30:44 2016 +0000 Add CSI runtime library commit 12d885de0197344d7fdb5f810f6b86927bed999a Author: Damon Doucet Date: Tue Mar 1 16:07:37 2016 +0000 Add basic-block counter tool commit 37536fb1cd0075a681b0035c2676e25911d8db32 Author: Tyler Denniston Date: Tue Jan 12 05:49:06 2016 -0500 Add gprof tool commit 7c8a1856b384af58d3ee84d7472eb0faca9a2873 Author: Tyler Denniston Date: Fri Nov 20 11:56:53 2015 -0500 Update tsan benchmark commit 7ddb60372e0140acc5fa07643e516f95ef754a90 Author: Tyler Denniston Date: Thu Nov 19 16:42:26 2015 -0500 Increase memops iters commit a5e02d047e36551e4275c93f4b56250f9cb726d7 Author: Tyler Denniston Date: Thu Nov 19 10:34:09 2015 -0500 Replace old call-graph tool with current version. commit 3ad73c4623c588d80395aafefe438bb75ccb1a90 Author: Tyler Denniston Date: Thu Nov 19 10:16:26 2015 -0500 Update tools to not use structs. commit d4dd7e092e9b7927476fef0171008cf3e0407d99 Author: Tyler Denniston Date: Thu Nov 19 10:08:01 2015 -0500 Remove struct arg for load/store instrumentation. commit 24133c284a87ded2b140cdf7b26c145312e7308a Author: Tyler Denniston Date: Thu Nov 19 09:53:56 2015 -0500 Remove struct args commit 90fa9684b97d99a32ea9924564e8beea9529fb96 Author: Tyler Denniston Date: Thu Nov 19 09:38:13 2015 -0500 Remove problematic make flag commit 4c98e18f06d76103c76479a90d45ad9c952dd0fe Author: Damon Doucet Date: Thu Nov 19 11:40:04 2015 +0000 Update csi wrapper for tsan to use newer api commit 9bf1761806b5cae1f651d1a3510cfde6a2aca178 Author: Damon Doucet Date: Thu Nov 19 11:39:38 2015 +0000 Add mini_bench_increments to gitignore commit fe0c768f6a220ec74eb8fed9cbd55beb62433ca8 Author: Damon Doucet Date: Thu Nov 19 11:38:46 2015 +0000 Update Makefile, add props_tool, update csi.h and null_tool to use new property syntax commit 67bdc9ba265ddd783a086e849e0317f0b2cb23a1 Author: Damon Doucet Date: Thu Nov 19 09:24:55 2015 +0000 Add mini_bench_increments tsan benchmark commit 87d7a487571c9d3326a523b9a8e5b1e1eb98097b Author: Damon Doucet Date: Thu Nov 19 08:44:08 2015 +0000 Add work to fib-serial function body commit 5cc1ce88bcecf98aa20988a9085da385f1dc3796 Author: Damon Doucet Date: Mon Nov 16 19:28:21 2015 +0000 Move prof_lite/ to cilkprof/ and create prof_lite tool commit c70fe8bd927e21684a194aa2e01d4ce8948fd54c Author: Damon Doucet Date: Mon Nov 16 14:13:09 2015 +0000 Cause prof_lite to time functions (correctly this time) commit 9a71a146b988ef8d6201e21d971269ea3362f63a Author: Damon Doucet Date: Mon Nov 16 14:12:46 2015 +0000 Refactor Makefile a little commit 5639041d8102c4887fc2204048e4e4fff8500dde Author: Damon Doucet Date: Mon Nov 16 07:35:34 2015 +0000 Make cilkprof actually time functions commit 3f3718a4d4d376d8e2e2773fe5abf74a46fda059 Author: Damon Doucet Date: Sun Nov 15 01:39:28 2015 +0000 Added profiler tool (mostly copied from cilkprof) commit 223037c12c13f8efd6ee16ead5c75986012a5b20 Author: Damon Doucet Date: Sat Nov 14 17:33:49 2015 +0000 Fix minor Makefile issues commit ea380362a2b15ec8583216d095d4ef721cb4463c Author: Damon Doucet Date: Fri Nov 13 19:49:52 2015 +0000 Make small modifications to foo and add a tool which instruments a single function Foo has two extra functions (used to test the new tool) Foo is compiled at O3 Makefile cleaned up/refactored a little bit Update tools to use new func_entry API (function pointer as first param) commit f390559910ec750688fead7dd3c38860f1cc6367 Merge: f4faac934 18f874c1a Author: Tyler Denniston Date: Thu Nov 12 09:00:51 2015 -0500 Merge pull request #5 from tdenniston/callgraph Callgraph tool commit 18f874c1ad0919c6368c68666f03b859e57a629d Merge: 65972cf60 f4faac934 Author: Tyler Denniston Date: Thu Nov 12 08:59:06 2015 -0500 Merge branch 'master' into callgraph commit f4faac93475e7d03fdf25b5bb547a8855669c55f Merge: b93517879 54983079a Author: Tyler Denniston Date: Mon Nov 9 15:01:14 2015 -0500 Merge pull request #4 from tdenniston/code-coverage Basic code coverage tool commit 65972cf60b0faf57601a6b5aae29f64f6795ff60 Author: Tyler Denniston Date: Mon Nov 9 13:59:24 2015 -0500 Add call graph tool commit 54983079ae6e9f42e1c9c0ef0b4ded97994a4066 Author: Tyler Denniston Date: Mon Nov 9 11:58:08 2015 -0500 Add coverage flags to MM test, disabled by default commit ac59ae9d12f43bd5df9cb24f4645a0d6ccd06f64 Author: Tyler Denniston Date: Sun Nov 8 16:23:24 2015 -0500 Add basic code coverage tool commit b310d6dcd9dd381bee13fdee6147f56a6394f783 Author: Tyler Denniston Date: Sun Nov 8 16:22:43 2015 -0500 Add TOOL flag to multi-module test commit 0681f45e5a94e83f4460719bb3472a0dadb7c4b9 Author: Tyler Denniston Date: Sun Nov 8 16:22:15 2015 -0500 Fix flag for 'foo' test commit 1520756b6f2716e1ab52436f60075daabb713ea2 Author: Tyler Denniston Date: Sat Nov 7 16:56:09 2015 -0500 Add module id to module init hook commit b935178798bc1719c6dbf487f26e983a68c6e4b9 Merge: f8bae19f8 62693097d Author: Tyler Denniston Date: Sat Nov 7 16:22:52 2015 -0500 Merge pull request #3 from tdenniston/instr-bb Basic block hooks + CSI link-time pass commit 62693097dfa6fec0352ea944fcdf776e365219f6 Author: Tyler Denniston Date: Sat Nov 7 15:22:34 2015 -0500 Update counter tool init signature commit 8f878dbcbfdbf5f8018d0aca0de392154e74a499 Author: Tyler Denniston Date: Sat Nov 7 14:41:05 2015 -0500 Add module count/BB count to init functions commit f8bae19f8d9a0235440c4ffed8c943f0bb234e17 Author: Damon Doucet Date: Fri Nov 6 14:58:02 2015 +0000 Add Makefile and gitignore for tsan benchmarks commit 3074f3909ee235c0aab1fc0b794c54d08fe1d97b Author: Damon Doucet Date: Fri Nov 6 14:57:03 2015 +0000 Add -O3 to csi tests Makefile commit e02059782b33d41333ac4bef26e1304ddf430bc1 Author: Tyler Denniston Date: Tue Nov 3 14:38:52 2015 -0500 Add basic block hooks commit d541b965911457a254ae903892fafb05c426ea6a Author: Tyler Denniston Date: Tue Nov 3 11:46:49 2015 -0500 Remove CSI runtime. (not needed now with linktime pass) commit 0241130c4ba8ceeb6f9c7ad9a821e2f54b4b38dd Merge: 43a1640a8 b4c0e73df Author: Tyler Denniston Date: Tue Nov 3 09:56:32 2015 -0500 Merge branch 'lt-component' into instr-bb commit b4c0e73dff6e4fc8d1bebf7884536a9cb9bc9370 Author: Damon Doucet Date: Tue Oct 27 15:13:07 2015 +0000 Add csi interface to tsan runtime and modify tests to compile with csi instrumentation commit 55470131900f2d3d00d08743c0ab615b32a66bb9 Author: Damon Doucet Date: Tue Oct 27 15:10:39 2015 +0000 Fix mutexset tests Before, they used +=, which can either write-race or read-race; I changed it to just use =, which guarantees a write race. commit 43a1640a8320fc2b31fedc143a4d58ac40299504 Author: Tyler Denniston Date: Mon Oct 26 15:40:48 2015 -0400 Remove executable I accidentally committed commit 7474c3236e5bf9d21a80001aa1ba160b89d5cb09 Author: Tyler Denniston Date: Mon Oct 26 15:39:46 2015 -0400 Add a tiny CSI runtime. This allows us to implement a "whole program" init function, i.e. across all modules. commit 28a6df71164a48f6bcd6aac8df4b4901d76e9fdb Author: Tyler Denniston Date: Mon Oct 26 15:19:42 2015 -0400 Add gold linker flag to LDFLAGS. commit a8afdb55c62cb9066757d9b5475f2e6ce4a4d83f Author: Tyler Denniston Date: Mon Oct 26 15:19:20 2015 -0400 Add multi-module test program. commit 408c546b51056f64abc5e585ca10fe054aed48d9 Merge: 178f46f88 5da2f8995 Author: Tyler Denniston Date: Mon Oct 26 14:41:23 2015 -0400 Merge pull request #1 from OpenKimono/toolkit-reorg Reorganize tools; add null tool commit 5da2f89951e7e3c21a880bc9aa136f2cdd0c465d Author: Tyler Denniston Date: Mon Oct 26 13:56:41 2015 -0400 Add make var to control what tool you link with. By default we link with the null tool. commit c162d35781ab5377c67469b2826a0a2b5d48f71b Author: Tyler Denniston Date: Mon Oct 26 13:50:18 2015 -0400 Rename __csi_destroy -> destroy. Also add anonymous namespace around tool-private code. commit ac3c257a35ba5bc98d7dc47d1402af172348029e Author: Tyler Denniston Date: Mon Oct 26 13:45:52 2015 -0400 Remove WEAK attributes in tool implementation. commit 2cbaed6d1a5db87b44b2e0ca0b7f4de16caa85fc Author: Tyler Denniston Date: Mon Oct 26 13:01:08 2015 -0400 Remove empty definitions from counter_tool. Now with the null tool and weak symbols, we only have to override functions we care about. commit b1f68859ac21c2ec2e801b22f4060959cf115f76 Author: Tyler Denniston Date: Mon Oct 26 12:59:31 2015 -0400 Add null tool commit 48f179b802f809b57c4512b90c5238cea785ff9a Author: Tyler Denniston Date: Mon Oct 26 12:31:19 2015 -0400 Rename empty_tool -> print_tool commit 89f2dd75f55a1cb96c5ecbbef0045edc16da689e Author: Tyler Denniston Date: Mon Oct 26 12:28:16 2015 -0400 Disable verbose compile by default. commit 302c5c7e549d0df1256a4282aeb59caed980cefa Author: Tyler Denniston Date: Mon Oct 26 12:27:40 2015 -0400 Move counter_tool to toolkit commit e25c696c7118fd9dcd987a941a88c7fcb40d058e Author: Tyler Denniston Date: Mon Oct 26 12:21:15 2015 -0400 Add 'toolkit' directory and move empty_tool there. This separates the tools from test programs. commit 178f46f885d1f3f5408feb7a3a7e3e61bb2dd067 Author: Damon Doucet Date: Thu Oct 22 21:47:17 2015 +0000 Rename tool methods from ok to csi, and update Makefile commit 12ac7dfdafbe1dbb78a1cc6cfae90dafdd753586 Author: Damon Doucet Date: Thu Oct 22 20:14:14 2015 +0000 Fix empty tool to accept the size of a load/store as a parameter rather than using multiple methods commit 33c2ea997bf7e2a9d7a29ab55f065bad390a2b58 Author: Damon Doucet Date: Tue Oct 20 21:32:20 2015 +0000 Update memops test slightly commit efeec78e253fd755ba45c5dd549c298af6ac2d0d Author: Damon Doucet Date: Thu Oct 8 16:17:55 2015 +0000 Replace branching by num_bytes with array access commit 37410883ecf4c767c5b97f42af447e9c61c18d4b Author: Damon Doucet Date: Thu Oct 8 15:29:46 2015 +0000 Convert memory throughput test to use one before/after_load/store per type commit 54ff6c64d5de7599f4bea9b7cd5091d86ae6332d Author: Damon Doucet Date: Thu Oct 8 15:37:02 2015 +0000 Add memory throughput test to measure instrumentation overhead commit 6dc7cbc3525e5061a8d29f7e2f56e03dff1095f3 Author: Damon Doucet Date: Thu Oct 8 14:48:26 2015 +0000 Add ok constructor to empty tool commit 85939595b46f8765c0c49630b6584d066ce144c1 Author: Damon Doucet Date: Wed Oct 7 03:51:06 2015 +0000 Add LLVM crash dumps to gitignore commit 83e2442a4483964c60c99c4c9731be98379ad4a3 Author: Damon Doucet Date: Wed Oct 7 03:40:36 2015 +0000 Remove values from load/store hooks commit be6227fd779369033869bdcf471b9079505257eb Author: Damon Doucet Date: Wed Oct 7 00:51:23 2015 +0000 Remove unnecessary include from foo commit fad80fb4352f2df4f9c7717b69cbda51ff7fac6e Author: Damon Doucet Date: Tue Oct 6 23:50:12 2015 +0000 Update empty tool to have func entry and exit hooks commit ee8840866f1a90950d60f70e22ac3427d8c2e5a6 Author: Damon Doucet Date: Tue Oct 6 23:19:53 2015 +0000 Add fib-serial and program that writes to global variables to test/csi commit e8e668dba0c7ea1a5dd8dbb5e3aa98a7e2327616 Author: TB Schardl Date: Fri Aug 25 22:59:01 2017 +0000 [CSI] Updated CSI tests to properly handle CSI properties. commit 408469c5f3388cd6bc44cd1f64e4a7822df4f70f Author: TB Schardl Date: Fri Aug 25 22:57:56 2017 +0000 [CSI] Fixed ordering of FED tables to match ordering implemented by instrumentation pass. commit 4507fe8fe4f9fdb3897b8ebe8c104f2233ed2aff Author: TB Schardl Date: Fri Aug 25 22:57:02 2017 +0000 [CSI] Fixed ordering of instrumentation counts and FED tables to match ordering implemented by instrumentation pass. Added basic properties to functions and basic blocks. Added column numbers to source locations. commit 4b38c4038a4f2b8e2d02b5f5d7877fa79d940009 Author: Hans Wennborg Date: Fri Aug 25 00:31:02 2017 +0000 Merging r311674: ------------------------------------------------------------------------ r311674 | hans | 2017-08-24 10:00:36 -0700 (Thu, 24 Aug 2017) | 3 lines Mark allocator_oom_test.cc unsupported on arm & aarch64 (PR33972) The buildbots don't seem to like it. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@311736 91177308-0d34-0410-b5e6-96231b3b80d8 commit b1684fe91a3c35f55cc02d9b41bc47caa65e317c Author: Hans Wennborg Date: Wed Aug 23 21:38:59 2017 +0000 Merging r311555: ------------------------------------------------------------------------ r311555 | oleg | 2017-08-23 07:26:31 -0700 (Wed, 23 Aug 2017) | 14 lines [ARM][Compiler-rt] Fix AEABI builtins to correctly pass arguments to non-AEABI functions on HF targets Summary: This is a patch for PR34167. On HF targets functions like `__{eq,lt,le,ge,gt}df2` and `__{eq,lt,le,ge,gt}sf2` expect their arguments to be passed in d/s registers, while some of the AEABI builtins pass them in r registers. Reviewers: compnerd, peter.smith, asl Reviewed By: peter.smith, asl Subscribers: peter.smith, aemerson, dberris, javed.absar, llvm-commits, asl, kristof.beyls Differential Revision: https://reviews.llvm.org/D36675 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@311606 91177308-0d34-0410-b5e6-96231b3b80d8 commit 09d9a9e1e1ee4188e0227b8b9bce04bd8df287b6 Author: Hans Wennborg Date: Wed Aug 23 18:09:02 2017 +0000 Merging r311496: ------------------------------------------------------------------------ r311496 | hans | 2017-08-22 14:54:37 -0700 (Tue, 22 Aug 2017) | 1 line [profile] Fix warning about C++ style comment in C file ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@311584 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78162497aa177b34956aee0458e09d8b97b5dd2b Author: Hans Wennborg Date: Wed Aug 23 18:07:44 2017 +0000 Merging r311495: ------------------------------------------------------------------------ r311495 | hans | 2017-08-22 14:54:37 -0700 (Tue, 22 Aug 2017) | 6 lines [esan] Use stack_t instead of struct sigaltstack (PR34011) The struct tag is going away in soon-to-be-released glibc 2.26 and the stack_t typedef seems to have been there forever. Patch by Bernhard Rosenkraenzer! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@311583 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9fe695a3428903fbe79cb360a475da9b86d08eb0 Author: TB Schardl Date: Tue Aug 8 23:05:51 2017 +0000 Squashed commit of the following: commit 71c22bcedac24836cff8507dc0e41bcd2878794b Merge: 6b95f9648 66ccf0079 Author: TB Schardl Date: Fri Jun 2 15:44:43 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit 6b95f9648b076e351ad057d3193ad0923e0ebc40 Merge: 60dc811b9 97fc005f6 Author: TB Schardl Date: Wed May 31 01:45:14 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit 60dc811b9e63072928fd3af2fca89f3fbbc5b874 Merge: a4d010dcc 718173908 Author: TB Schardl Date: Fri May 26 12:16:33 2017 +0000 Merge branch 'master' of http://llvm.org/git/compiler-rt commit a4d010dccb8c9cd19accfefb0c739bd771472ef6 Merge: d16e4026a 451043533 Author: TB Schardl Date: Mon Apr 24 15:49:06 2017 +0000 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit 4510435336499c05af77c807798eec4ba1a6ec65 Author: Tyler Denniston Date: Fri Apr 14 10:38:07 2017 -0500 [CSI-TSan] Fix 'undefined symbol' errors when using CSI-TSan on shared libraries. commit 4cb88157f43e78a49ace4340788a1ee4834fb777 Author: TB Schardl Date: Wed Apr 12 02:04:51 2017 +0000 [CSI-null] Encourage inlining of null CSI hooks. commit e1076ccbeb1ddc856d23397f80b91a2735a9f00e Author: TB Schardl Date: Tue Apr 11 15:11:48 2017 +0000 [CSI] Differentiating between property types for different categories of IR objects. Added several new properties. commit d16e4026ab6407041e042dda07d4538a597545fe Merge: 796f898b4 4154f1e99 Author: TB Schardl Date: Mon Apr 3 21:19:26 2017 +0000 [CSI] Merging CSI into compiler-rt checkout compatible with Tapir/LLVM. commit 4154f1e9973d456bc34a3d25dd9dd9d7f0d39384 Author: TB Schardl Date: Sat Dec 31 17:37:50 2016 +0000 [CSI] Add function names to FED tables. commit f277290b342a8922328d75b8e36c2ecdf5c086fb Author: Angelina Lee Date: Sun Dec 4 19:31:39 2016 -0600 Comment out unused __csi_disable_instrumentation flag commit 8b99936641aff04c11dcb647df63a19a65da54eb Author: Tyler Denniston Date: Fri Sep 2 16:10:34 2016 -0400 Disable STL test for now. commit 2c11ec55eb0c0650166fd7d54de563b743b26c17 Author: Tyler Denniston Date: Thu Sep 1 17:07:52 2016 -0400 Use new property struct bitfield. commit 662843a13025616c8dc3170490489e8b387f9bcb Author: Tyler Denniston Date: Mon Aug 15 15:00:42 2016 -0400 Disable instrumentation moved into compiler. commit 1165fb7d1ada6f19258bceb901527048575c56bb Author: Tyler Denniston Date: Mon Aug 15 12:53:50 2016 -0400 Initial implementation of __csi_disable_instrumentation commit 918b750beb64b2d2ee32e6966e7744dc92e6cb9c Author: Tyler Denniston Date: Mon Aug 15 12:31:15 2016 -0400 Add C++ test using STL. commit d78554b1a620a97ed628df4b788093b7e9613e04 Author: Tyler Denniston Date: Mon Aug 15 10:36:07 2016 -0400 Add no-op property parameter to all hooks. commit 7cad8c58c916ee7144c55472705e342da94e9f88 Merge: 3a57261c3 e42331093 Author: Tyler Denniston Date: Wed Jul 13 13:12:28 2016 -0400 Merge remote-tracking branch 'llvm-origin/master' commit 3a57261c3bb5ca6ffddbf8526956bae847d92c4f Merge: 1b980b0ea 28f8c79bb Author: Tyler Denniston Date: Mon Jun 27 09:13:08 2016 -0400 Merge remote-tracking branch 'llvm-origin/master' commit 1b980b0eaa768a08006bd37bc0bb50e015c57bee Author: Tyler Denniston Date: Thu Jun 16 14:49:01 2016 -0400 Add FED test commit 4eb78d43c7a76c3fedeeb20a9b0a9b39586d47d5 Author: Tyler Denniston Date: Thu Jun 16 11:57:00 2016 -0400 Add test for read-before-write property commit 7d819a455658e42a294502e9a17e9959e1f08844 Author: Tyler Denniston Date: Thu Jun 16 11:47:49 2016 -0400 Fix test output names to allow them to run in parallel commit e310651d04e45435aa69019c6cfafccbb04e72d2 Author: Tyler Denniston Date: Thu Jun 16 11:20:40 2016 -0400 Add unknown callsite test commit 5b17bcb73e1a4b75784213b426ce746efe6b9df2 Author: Tyler Denniston Date: Thu Jun 16 11:13:42 2016 -0400 Load property commit 6162841412121611977be08be4759696b4f9fe3a Author: Tyler Denniston Date: Thu Jun 16 11:05:48 2016 -0400 Add unknown ID macro commit 8a768b85a167094e95d7461ed8d47607b7f5bc84 Author: Tyler Denniston Date: Wed Jun 15 17:43:33 2016 -0400 Remove old changes not matching upstream commit 793e6853ad80bf9d9ed64d4ebf3ac6b07afa41e0 Author: Tyler Denniston Date: Wed Jun 15 17:18:48 2016 -0400 Add shared object CSI test (passing) commit 40ae69e55bc692b38081c3c7311d0172c42c1ea0 Author: Tyler Denniston Date: Wed Jun 15 17:18:28 2016 -0400 Fix CSIRT visibility bug commit 214cff3b635bfc37b3e929a111e247328a247930 Author: Tyler Denniston Date: Wed Jun 15 16:55:45 2016 -0400 Add multiple units test (passing) commit 687cb3fa237cc3da845c7a0e1982a2e251ce69e9 Author: Tyler Denniston Date: Wed Jun 15 16:55:24 2016 -0400 Fix runtime bug with empty FED tables commit 5a8adad2eff56d5a980759dc9d7220cdab0b60be Author: Tyler Denniston Date: Wed Jun 15 16:39:13 2016 -0400 First CSI test passing. commit 456af2165c4b4ab94dd6593b72edff786755427e Author: Tyler Denniston Date: Wed Jun 15 16:16:15 2016 -0400 'make check-csi' initially working. The test doesn't check anything yet, but the infrastructure is there. commit d3fba62f917a8d9086926a35794f7faa4a9b9019 Author: Tyler Denniston Date: Wed Jun 15 11:39:06 2016 -0400 Removing old tests. They will come back in a different form. commit 4a10e34eaec8e5e3be8a283dc6c9fa0ba42f2eea Author: Tyler Denniston Date: Wed Jun 15 11:34:50 2016 -0400 Removing toolkit until I figure out a better place for it. commit 42c9b2f043f404edd9fb3a633c57dced85a56443 Author: Tyler Denniston Date: Wed Jun 15 11:33:10 2016 -0400 Remove csirt.c and csi.h from tests. This breaks building tools for now. commit 6597b37233cd9c71dbf987884284298f306e91b3 Author: Tyler Denniston Date: Wed Jun 15 11:27:06 2016 -0400 Start splitting CSI up by lib and test lib/csi will contain the runtime and (for now) csi.h test/csi will only contain Lit tests commit da52217503266046b0cb22c2415f7581457c9f7f Author: Tyler Denniston Date: Wed Jun 15 11:25:21 2016 -0400 Remove old tsan+csi files commit 7e6dbf074de41b61f7b9e6515932c681c6e82138 Author: Tyler Denniston Date: Tue Jun 14 16:10:39 2016 -0400 FED tables are now copied. commit df3cd69526204337c81705a161bb66f14d3d60dc Author: Tyler Denniston Date: Tue Jun 14 15:56:03 2016 -0400 API cleanups commit 7bcd7ee719ffbb70522876c175698f1d3f882b13 Author: Tyler Denniston Date: Tue Jun 14 13:51:47 2016 -0400 Add back callsite -> func id mapping. Mistakenly removed in f994e3d7 commit c89ab1ad9b2a931da9100575064e30cee29d43c6 Author: Angelina Lee Date: Tue Jun 14 11:37:03 2016 -0500 Adding a simple memory tracer tool commit f994e3d7c0c8c5d3c30e2c63eb82917eb56bf3e3 Author: Tyler Denniston Date: Tue Jun 14 11:45:22 2016 -0400 Remove relation tables. commit b1ee9026096a524e753678fc1b0c356a268cb3b8 Merge: bd0531e56 a01fb8f7e Author: Tyler Denniston Date: Tue Jun 14 10:55:02 2016 -0400 Merge remote-tracking branch 'upstream/master' commit a01fb8f7e95879add1e4a686cfcb053e13becc92 Merge: a159bc23b df01596dc Author: Tyler Denniston Date: Tue Jun 14 10:40:12 2016 -0400 Merge remote-tracking branch 'upstream/master' Conflicts: lib/tsan/rtl/Makefile test/tsan/mutexset6.cc test/tsan/test_output.sh commit bd0531e569a50ca8fb7ede07ea9e71612983b996 Author: Tyler Denniston Date: Mon Jun 13 10:52:15 2016 -0400 Demo tool prints in colors. commit a9699746f5d289471f10951e288e754682b6109d Author: Tyler Denniston Date: Mon Jun 13 10:25:30 2016 -0400 Cleanups to demo tool commit a2c22a1958a94166442779799261cd65c59c87e7 Author: Tyler Denniston Date: Wed Jun 8 12:56:04 2016 -0400 Clean up Makefile commit dad28ca144e48fbbdcc9e29b218f5dd3fc047fc7 Author: Tyler Denniston Date: Wed Jun 8 11:13:09 2016 -0400 Update demo tool to not use STL structures commit 845303fa04b0778520a30fe78a17e7e207b572f8 Author: Tyler Denniston Date: Wed Jun 8 10:31:37 2016 -0400 Update demo tool to maintain shadow stack commit 75e3027d522851a77842b6a7fe98af750fb9930d Author: Tyler Denniston Date: Tue Jun 7 14:35:40 2016 -0400 Update dyn test and add demo tool commit 31a2b4a9789546ae874cf1f24093ce749bcafbb8 Author: Tyler Denniston Date: Mon Jun 6 11:00:30 2016 -0400 Update print tool commit 0b6f8bcd2ea7098757d0830f32898b3b9a189a4b Merge: 0c2d334e3 c7e2e4f26 Author: Tyler Denniston Date: Mon Jun 6 10:44:42 2016 -0400 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit c7e2e4f260ef1416fff8f006df13037426d890c2 Author: Damon Doucet Date: Thu Jun 2 02:53:29 2016 +0000 Combine FED tables into a list of structures in __csirt_unit_init commit 4e304f17c688feda096e31b6a8c37347277c0650 Author: Damon Doucet Date: Thu Jun 2 01:48:55 2016 +0000 Add const qualifiers to FED entries and return pointers in accessors commit e1e1a25b7b280832759b98e01fecb6744b47e99e Author: Damon Doucet Date: Thu Jun 2 01:32:16 2016 +0000 Change __csi_before/after_callsite to __csi_before/after_call commit c43a5f10d42e212dabe225fa8742dc7d015a83fb Author: Damon Doucet Date: Thu Jun 2 01:26:59 2016 +0000 Reorder hooks in csi.h to match API doc commit 7cc94d8b7f6ad1254e35cde407d1e568c3574d61 Author: Damon Doucet Date: Wed Jun 1 21:07:59 2016 +0000 Add const to params in __csi_before/after_call in csi.h commit 9f9c853a997ca56c8ee2fd181bf386b12a436ec9 Author: Damon Doucet Date: Wed Jun 1 21:07:13 2016 +0000 Rename __csirt_callsite_target_unknown to have "is" commit 1aabc135bed7f796f08362c07a3098d41d59a909 Author: Damon Doucet Date: Wed Jun 1 21:04:14 2016 +0000 Fix types commit 2301b1b8e4afdc1342c8e4111f53409df1248149 Author: Damon Doucet Date: Wed Jun 1 20:32:37 2016 +0000 Move accessors from csirt.h to csi.h commit 0c2d334e3c64e7afc2fd05ea3343db21bd2d28b5 Merge: 0eaf686da 4f6cea22b Author: Tyler Denniston Date: Wed Jun 1 14:06:06 2016 -0400 Merge branch 'master' of github.com:CSI-LLVM/compiler-rt commit 0eaf686da2a8c46de3d2c151c1b8e3dcc96ca02e Author: Tyler Denniston Date: Wed Jun 1 14:05:52 2016 -0400 Update comment commit 4f6cea22bcc1a4956e9686e7b7e166c4fd3e7c3e Author: Damon Doucet Date: Wed Jun 1 17:08:03 2016 +0000 Add __csi_after_callsite to csi.h, null_tool, and fed_test commit 43d355d94becf464033c0f58127a914f21fc4461 Author: Damon Doucet Date: Wed Jun 1 16:38:54 2016 +0000 Update csi.h, null_tool, and fed_test to match API better commit ddfb53f805e2beb4cfb0b9db569d6f1af98dd0b6 Author: Damon Doucet Date: Tue May 31 18:59:19 2016 +0000 Change runtime library to better match API commit f0b0bfd3c68dc27829a889131ce4550e9275476d Author: Tyler Denniston Date: Wed Jun 1 10:45:15 2016 -0400 Add new callback to csirt commit e5499f8e7734e68823cbc610dd85d36d75936dbd Author: Tyler Denniston Date: Wed Jun 1 10:29:51 2016 -0400 Update print tool commit 0451b37b20c581fc61ae1eb48b8ebf8fa214901e Author: Tyler Denniston Date: Wed Jun 1 09:53:04 2016 -0400 Update dyn test commit 5ccb4b02bed6655ef7e9e8e37d070d0804ae7701 Author: Tyler Denniston Date: Wed Jun 1 09:29:14 2016 -0400 Print tool now prints FED information as well commit 87fafee58d5c3fc87d198012cfc310a93a2a8653 Author: Tyler Denniston Date: Wed Jun 1 09:20:33 2016 -0400 Update print tool commit 8d3e6d166e54050499149e0f6a57cef86ea650d8 Author: Tyler Denniston Date: Wed Jun 1 09:05:13 2016 -0400 More comments commit 72161e32eef0f3e4c0453c8b8bd256363a1d43ba Author: Tyler Denniston Date: Wed Jun 1 08:58:39 2016 -0400 More comments commit 5e28271641523cb294a4791abcee5b5b99213bb2 Author: Tyler Denniston Date: Wed Jun 1 08:50:14 2016 -0400 Add some comments commit 779e9e1c690d2f23dbf62ce03cb82fd90a611d14 Author: Tyler Denniston Date: Fri May 27 11:26:37 2016 -0400 Use relation tables in API impl. commit fb2441950bc409d6782503782982360ab67c868d Author: Tyler Denniston Date: Tue May 24 09:59:19 2016 -0400 Add skeleton of rel tables implementation commit f7957f485b99b2186ceef01be8d98a1ee1503ef9 Author: Tyler Denniston Date: Mon May 23 10:45:38 2016 -0400 Add instrumentation_counts_t struct for unit init commit e4ad154725490c6bbf52852e1d4725211f1d8c37 Author: Tyler Denniston Date: Tue May 17 15:14:48 2016 -0400 Remove global FED commit 61965e13db755bd2845819e2b4bc67b390e3c6f6 Author: Tyler Denniston Date: Tue May 17 15:11:10 2016 -0400 Add load/store feds commit 6a9c153cb06a8fb670781572900e02a1a8208668 Author: Tyler Denniston Date: Tue May 17 15:01:38 2016 -0400 Add func exit FED commit 243e9be01cbe17aa56720484be39989889490ffd Author: Tyler Denniston Date: Tue May 17 14:14:49 2016 -0400 Add callsite FED commit b0a8520c36496b5149ba85c7769a854308e62892 Author: Tyler Denniston Date: Tue May 17 13:38:14 2016 -0400 Add BB FED table commit ee343d8098be87e920b327406cfdfcb504c5756c Author: Tyler Denniston Date: Tue May 17 10:50:48 2016 -0400 Add separate FED (and therefore ID space) for functions. commit 49c3e2a397a7ffee6c9219773c52bc23090a85d1 Author: Tyler Denniston Date: Tue May 17 10:40:33 2016 -0400 Prepare for multiple FED types. commit 8d4606d411f426d75de599afe04133781cabecd9 Author: Tyler Denniston Date: Tue May 17 10:30:26 2016 -0400 More refactoring commit b35e133bb60bda3dba11d30a65ae941aa04d4021 Author: Tyler Denniston Date: Tue May 17 10:28:40 2016 -0400 Small refactor commit df70be5ce0aeafc1064385a535700a1927f32780 Author: Tyler Denniston Date: Tue May 17 10:15:43 2016 -0400 Rework csirt fed table collection structure commit f0721fbd26ceb974240e9db736edc01d05d8dae8 Author: Tyler Denniston Date: Tue May 17 09:57:01 2016 -0400 Refactor csirt.c commit 3fe6b885f76350770c4ec228008551085710dd5b Author: Tyler Denniston Date: Tue May 17 09:43:51 2016 -0400 Small csirt.c cleanups commit 50cb199561239f3a8b56f62885a8c259966a4d17 Merge: 72a21d227 c94c7154c Author: Bradley C. Kuszmaul Date: Tue May 10 17:47:52 2016 +0000 Merge branch 'printtool' commit c94c7154c26befefd26c6b0df869dbc7b987fd97 Author: Bradley C. Kuszmaul Date: Tue May 10 17:47:46 2016 +0000 Make printtool compile commit 72a21d2276c30cf7643cd320706e3cae3f04d7dc Author: Bradley C. Kuszmaul Date: Thu May 5 16:43:18 2016 +0000 get this to work: make TOOL=print_tool commit a159bc23b41f8e2ecc2edb3cbafa0b3c080adfa6 Author: Tyler Denniston Date: Mon May 2 09:22:19 2016 -0400 Add callsites tool commit 8e5b4e88d09f1570a7698f8601ab3cfd98871851 Author: Tyler Denniston Date: Mon May 2 09:21:41 2016 -0400 Update dynamic lib test commit ac300cc3efea2357829d9bee1d96bb91a6f8fef2 Author: Tyler Denniston Date: Mon May 2 08:56:22 2016 -0400 Add before_callsite test. commit 601ed83b4d88e33c2722e5d70237d036259b46c8 Author: Damon Doucet Date: Tue Apr 26 17:24:55 2016 +0000 Add -g flag to multi module test Makefiles commit c609bef3617aa2b94a83b82bd7f8000f0236eee0 Author: Tyler Denniston Date: Tue Apr 26 10:22:23 2016 -0400 Add back Makefile rule for null_tool. commit 10d5a053b291be20aac6103af52e382f85ff9eaf Author: Damon Doucet Date: Tue Apr 5 01:51:25 2016 +0000 Add CSI Runtime Library, add FED test, update some header files commit 9bef3a2f0e916608808823f473a9fe2924405092 Author: Damon Doucet Date: Wed Mar 30 06:37:36 2016 +0000 Update gitignore files commit 6c4c541e9b1cf52a028d4dc82a8e891adfd0bb22 Author: Damon Doucet Date: Wed Mar 30 06:37:16 2016 +0000 Add dynamically loaded multi module test commit 1a54ef89e4f11635dc115d97c9cd923937566109 Author: Damon Doucet Date: Wed Mar 30 06:36:14 2016 +0000 Update toolkit to have more recent API, as well as csirt header commit d261ca785d7aedd6cd0d42629705a2b95e0fe921 Author: Damon Doucet Date: Wed Mar 30 06:32:58 2016 +0000 Update foo test to include a statically linked unit commit 225f2e2f1f356e50fcaf9846dda50ac051f99506 Author: Damon Doucet Date: Wed Mar 30 06:30:44 2016 +0000 Add CSI runtime library commit 12d885de0197344d7fdb5f810f6b86927bed999a Author: Damon Doucet Date: Tue Mar 1 16:07:37 2016 +0000 Add basic-block counter tool commit 37536fb1cd0075a681b0035c2676e25911d8db32 Author: Tyler Denniston Date: Tue Jan 12 05:49:06 2016 -0500 Add gprof tool commit 7c8a1856b384af58d3ee84d7472eb0faca9a2873 Author: Tyler Denniston Date: Fri Nov 20 11:56:53 2015 -0500 Update tsan benchmark commit 7ddb60372e0140acc5fa07643e516f95ef754a90 Author: Tyler Denniston Date: Thu Nov 19 16:42:26 2015 -0500 Increase memops iters commit a5e02d047e36551e4275c93f4b56250f9cb726d7 Author: Tyler Denniston Date: Thu Nov 19 10:34:09 2015 -0500 Replace old call-graph tool with current version. commit 3ad73c4623c588d80395aafefe438bb75ccb1a90 Author: Tyler Denniston Date: Thu Nov 19 10:16:26 2015 -0500 Update tools to not use structs. commit d4dd7e092e9b7927476fef0171008cf3e0407d99 Author: Tyler Denniston Date: Thu Nov 19 10:08:01 2015 -0500 Remove struct arg for load/store instrumentation. commit 24133c284a87ded2b140cdf7b26c145312e7308a Author: Tyler Denniston Date: Thu Nov 19 09:53:56 2015 -0500 Remove struct args commit 90fa9684b97d99a32ea9924564e8beea9529fb96 Author: Tyler Denniston Date: Thu Nov 19 09:38:13 2015 -0500 Remove problematic make flag commit 4c98e18f06d76103c76479a90d45ad9c952dd0fe Author: Damon Doucet Date: Thu Nov 19 11:40:04 2015 +0000 Update csi wrapper for tsan to use newer api commit 9bf1761806b5cae1f651d1a3510cfde6a2aca178 Author: Damon Doucet Date: Thu Nov 19 11:39:38 2015 +0000 Add mini_bench_increments to gitignore commit fe0c768f6a220ec74eb8fed9cbd55beb62433ca8 Author: Damon Doucet Date: Thu Nov 19 11:38:46 2015 +0000 Update Makefile, add props_tool, update csi.h and null_tool to use new property syntax commit 67bdc9ba265ddd783a086e849e0317f0b2cb23a1 Author: Damon Doucet Date: Thu Nov 19 09:24:55 2015 +0000 Add mini_bench_increments tsan benchmark commit 87d7a487571c9d3326a523b9a8e5b1e1eb98097b Author: Damon Doucet Date: Thu Nov 19 08:44:08 2015 +0000 Add work to fib-serial function body commit 5cc1ce88bcecf98aa20988a9085da385f1dc3796 Author: Damon Doucet Date: Mon Nov 16 19:28:21 2015 +0000 Move prof_lite/ to cilkprof/ and create prof_lite tool commit c70fe8bd927e21684a194aa2e01d4ce8948fd54c Author: Damon Doucet Date: Mon Nov 16 14:13:09 2015 +0000 Cause prof_lite to time functions (correctly this time) commit 9a71a146b988ef8d6201e21d971269ea3362f63a Author: Damon Doucet Date: Mon Nov 16 14:12:46 2015 +0000 Refactor Makefile a little commit 5639041d8102c4887fc2204048e4e4fff8500dde Author: Damon Doucet Date: Mon Nov 16 07:35:34 2015 +0000 Make cilkprof actually time functions commit 3f3718a4d4d376d8e2e2773fe5abf74a46fda059 Author: Damon Doucet Date: Sun Nov 15 01:39:28 2015 +0000 Added profiler tool (mostly copied from cilkprof) commit 223037c12c13f8efd6ee16ead5c75986012a5b20 Author: Damon Doucet Date: Sat Nov 14 17:33:49 2015 +0000 Fix minor Makefile issues commit ea380362a2b15ec8583216d095d4ef721cb4463c Author: Damon Doucet Date: Fri Nov 13 19:49:52 2015 +0000 Make small modifications to foo and add a tool which instruments a single function Foo has two extra functions (used to test the new tool) Foo is compiled at O3 Makefile cleaned up/refactored a little bit Update tools to use new func_entry API (function pointer as first param) commit f390559910ec750688fead7dd3c38860f1cc6367 Merge: f4faac934 18f874c1a Author: Tyler Denniston Date: Thu Nov 12 09:00:51 2015 -0500 Merge pull request #5 from tdenniston/callgraph Callgraph tool commit 18f874c1ad0919c6368c68666f03b859e57a629d Merge: 65972cf60 f4faac934 Author: Tyler Denniston Date: Thu Nov 12 08:59:06 2015 -0500 Merge branch 'master' into callgraph commit f4faac93475e7d03fdf25b5bb547a8855669c55f Merge: b93517879 54983079a Author: Tyler Denniston Date: Mon Nov 9 15:01:14 2015 -0500 Merge pull request #4 from tdenniston/code-coverage Basic code coverage tool commit 65972cf60b0faf57601a6b5aae29f64f6795ff60 Author: Tyler Denniston Date: Mon Nov 9 13:59:24 2015 -0500 Add call graph tool commit 54983079ae6e9f42e1c9c0ef0b4ded97994a4066 Author: Tyler Denniston Date: Mon Nov 9 11:58:08 2015 -0500 Add coverage flags to MM test, disabled by default commit ac59ae9d12f43bd5df9cb24f4645a0d6ccd06f64 Author: Tyler Denniston Date: Sun Nov 8 16:23:24 2015 -0500 Add basic code coverage tool commit b310d6dcd9dd381bee13fdee6147f56a6394f783 Author: Tyler Denniston Date: Sun Nov 8 16:22:43 2015 -0500 Add TOOL flag to multi-module test commit 0681f45e5a94e83f4460719bb3472a0dadb7c4b9 Author: Tyler Denniston Date: Sun Nov 8 16:22:15 2015 -0500 Fix flag for 'foo' test commit 1520756b6f2716e1ab52436f60075daabb713ea2 Author: Tyler Denniston Date: Sat Nov 7 16:56:09 2015 -0500 Add module id to module init hook commit b935178798bc1719c6dbf487f26e983a68c6e4b9 Merge: f8bae19f8 62693097d Author: Tyler Denniston Date: Sat Nov 7 16:22:52 2015 -0500 Merge pull request #3 from tdenniston/instr-bb Basic block hooks + CSI link-time pass commit 62693097dfa6fec0352ea944fcdf776e365219f6 Author: Tyler Denniston Date: Sat Nov 7 15:22:34 2015 -0500 Update counter tool init signature commit 8f878dbcbfdbf5f8018d0aca0de392154e74a499 Author: Tyler Denniston Date: Sat Nov 7 14:41:05 2015 -0500 Add module count/BB count to init functions commit f8bae19f8d9a0235440c4ffed8c943f0bb234e17 Author: Damon Doucet Date: Fri Nov 6 14:58:02 2015 +0000 Add Makefile and gitignore for tsan benchmarks commit 3074f3909ee235c0aab1fc0b794c54d08fe1d97b Author: Damon Doucet Date: Fri Nov 6 14:57:03 2015 +0000 Add -O3 to csi tests Makefile commit e02059782b33d41333ac4bef26e1304ddf430bc1 Author: Tyler Denniston Date: Tue Nov 3 14:38:52 2015 -0500 Add basic block hooks commit d541b965911457a254ae903892fafb05c426ea6a Author: Tyler Denniston Date: Tue Nov 3 11:46:49 2015 -0500 Remove CSI runtime. (not needed now with linktime pass) commit 0241130c4ba8ceeb6f9c7ad9a821e2f54b4b38dd Merge: 43a1640a8 b4c0e73df Author: Tyler Denniston Date: Tue Nov 3 09:56:32 2015 -0500 Merge branch 'lt-component' into instr-bb commit b4c0e73dff6e4fc8d1bebf7884536a9cb9bc9370 Author: Damon Doucet Date: Tue Oct 27 15:13:07 2015 +0000 Add csi interface to tsan runtime and modify tests to compile with csi instrumentation commit 55470131900f2d3d00d08743c0ab615b32a66bb9 Author: Damon Doucet Date: Tue Oct 27 15:10:39 2015 +0000 Fix mutexset tests Before, they used +=, which can either write-race or read-race; I changed it to just use =, which guarantees a write race. commit 43a1640a8320fc2b31fedc143a4d58ac40299504 Author: Tyler Denniston Date: Mon Oct 26 15:40:48 2015 -0400 Remove executable I accidentally committed commit 7474c3236e5bf9d21a80001aa1ba160b89d5cb09 Author: Tyler Denniston Date: Mon Oct 26 15:39:46 2015 -0400 Add a tiny CSI runtime. This allows us to implement a "whole program" init function, i.e. across all modules. commit 28a6df71164a48f6bcd6aac8df4b4901d76e9fdb Author: Tyler Denniston Date: Mon Oct 26 15:19:42 2015 -0400 Add gold linker flag to LDFLAGS. commit a8afdb55c62cb9066757d9b5475f2e6ce4a4d83f Author: Tyler Denniston Date: Mon Oct 26 15:19:20 2015 -0400 Add multi-module test program. commit 408c546b51056f64abc5e585ca10fe054aed48d9 Merge: 178f46f88 5da2f8995 Author: Tyler Denniston Date: Mon Oct 26 14:41:23 2015 -0400 Merge pull request #1 from OpenKimono/toolkit-reorg Reorganize tools; add null tool commit 5da2f89951e7e3c21a880bc9aa136f2cdd0c465d Author: Tyler Denniston Date: Mon Oct 26 13:56:41 2015 -0400 Add make var to control what tool you link with. By default we link with the null tool. commit c162d35781ab5377c67469b2826a0a2b5d48f71b Author: Tyler Denniston Date: Mon Oct 26 13:50:18 2015 -0400 Rename __csi_destroy -> destroy. Also add anonymous namespace around tool-private code. commit ac3c257a35ba5bc98d7dc47d1402af172348029e Author: Tyler Denniston Date: Mon Oct 26 13:45:52 2015 -0400 Remove WEAK attributes in tool implementation. commit 2cbaed6d1a5db87b44b2e0ca0b7f4de16caa85fc Author: Tyler Denniston Date: Mon Oct 26 13:01:08 2015 -0400 Remove empty definitions from counter_tool. Now with the null tool and weak symbols, we only have to override functions we care about. commit b1f68859ac21c2ec2e801b22f4060959cf115f76 Author: Tyler Denniston Date: Mon Oct 26 12:59:31 2015 -0400 Add null tool commit 48f179b802f809b57c4512b90c5238cea785ff9a Author: Tyler Denniston Date: Mon Oct 26 12:31:19 2015 -0400 Rename empty_tool -> print_tool commit 89f2dd75f55a1cb96c5ecbbef0045edc16da689e Author: Tyler Denniston Date: Mon Oct 26 12:28:16 2015 -0400 Disable verbose compile by default. commit 302c5c7e549d0df1256a4282aeb59caed980cefa Author: Tyler Denniston Date: Mon Oct 26 12:27:40 2015 -0400 Move counter_tool to toolkit commit e25c696c7118fd9dcd987a941a88c7fcb40d058e Author: Tyler Denniston Date: Mon Oct 26 12:21:15 2015 -0400 Add 'toolkit' directory and move empty_tool there. This separates the tools from test programs. commit 178f46f885d1f3f5408feb7a3a7e3e61bb2dd067 Author: Damon Doucet Date: Thu Oct 22 21:47:17 2015 +0000 Rename tool methods from ok to csi, and update Makefile commit 12ac7dfdafbe1dbb78a1cc6cfae90dafdd753586 Author: Damon Doucet Date: Thu Oct 22 20:14:14 2015 +0000 Fix empty tool to accept the size of a load/store as a parameter rather than using multiple methods commit 33c2ea997bf7e2a9d7a29ab55f065bad390a2b58 Author: Damon Doucet Date: Tue Oct 20 21:32:20 2015 +0000 Update memops test slightly commit efeec78e253fd755ba45c5dd549c298af6ac2d0d Author: Damon Doucet Date: Thu Oct 8 16:17:55 2015 +0000 Replace branching by num_bytes with array access commit 37410883ecf4c767c5b97f42af447e9c61c18d4b Author: Damon Doucet Date: Thu Oct 8 15:29:46 2015 +0000 Convert memory throughput test to use one before/after_load/store per type commit 54ff6c64d5de7599f4bea9b7cd5091d86ae6332d Author: Damon Doucet Date: Thu Oct 8 15:37:02 2015 +0000 Add memory throughput test to measure instrumentation overhead commit 6dc7cbc3525e5061a8d29f7e2f56e03dff1095f3 Author: Damon Doucet Date: Thu Oct 8 14:48:26 2015 +0000 Add ok constructor to empty tool commit 85939595b46f8765c0c49630b6584d066ce144c1 Author: Damon Doucet Date: Wed Oct 7 03:51:06 2015 +0000 Add LLVM crash dumps to gitignore commit 83e2442a4483964c60c99c4c9731be98379ad4a3 Author: Damon Doucet Date: Wed Oct 7 03:40:36 2015 +0000 Remove values from load/store hooks commit be6227fd779369033869bdcf471b9079505257eb Author: Damon Doucet Date: Wed Oct 7 00:51:23 2015 +0000 Remove unnecessary include from foo commit fad80fb4352f2df4f9c7717b69cbda51ff7fac6e Author: Damon Doucet Date: Tue Oct 6 23:50:12 2015 +0000 Update empty tool to have func entry and exit hooks commit ee8840866f1a90950d60f70e22ac3427d8c2e5a6 Author: Damon Doucet Date: Tue Oct 6 23:19:53 2015 +0000 Add fib-serial and program that writes to global variables to test/csi commit e6bb43d8b68ab16a71b060fc32fcba18d20f8828 Author: Hans Wennborg Date: Wed Jul 19 12:20:49 2017 +0000 Creating release_50 branch off revision 308441 git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/branches/release_50@308446 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4d7b22a690dd1f6df0017ae4c98245eaee6a7f73 Author: TB Schardl Date: Sun Nov 24 13:17:43 2019 +0000 [test/Cilk] Updated test for backwards-compatability patch of 'pragma cilk grainsize ='. commit d5b0b5b9066ff31c5c1571cffd9c7dff78cf8764 Author: TB Schardl Date: Sun Nov 17 21:30:45 2019 +0000 [Cilk] Regression test for sync and sync-region creation in a cilk_for loop body. commit dfbc0e6d0952c3e00c16b5cb02e681a48fd0b855 Author: TB Schardl Date: Tue Oct 22 18:34:02 2019 +0000 [Pragma] Add backwards compatibility for 'cilk grainsize = ...' syntax. Thanks to @skuntz for the essential code for this patch. This commit partially addresses wsmoses/Tapir-LLVM#90. commit 7d76b67d587e1630835d96c60aa8188ebc93084a Author: TB Schardl Date: Fri Oct 11 14:40:46 2019 +0000 [CGCilk] Bug fix to the creation of nested sync regions in _Cilk_for loop bodies. commit c206367d8d8d01dadf4901cc1d28a824a2633836 Author: TB Schardl Date: Fri Oct 4 03:20:48 2019 +0000 [Cilksan] Updated test for Cilksan changes to pass sync-region information. commit a40bb72eab6131e7a34e29bdcd50647de107c95e Author: TB Schardl Date: Wed Sep 4 01:18:49 2019 +0000 [Driver] Removed snappy library from set of libraries dynamically linked with Cilksan. commit b6e0856aaac387e27a76001826610f9b3df7cd8b Author: TB Schardl Date: Tue Sep 3 15:32:34 2019 +0000 [BackendUtil] Disable loop instrumentation for Cilkscale. commit f796ab9b1fda80b7e9b0edf7c2dfbe58d37be22a Author: TB Schardl Date: Fri Aug 9 15:55:00 2019 +0000 [LoopStripmine] Add command-line options to enable and disable Tapir loop stripmining, and enable Tapir loop stripmining at -O2 or higher (when vectorization is enabled). commit 8714ef35b08abd060daf9f3bebd3802b7e27ca45 Author: TB Schardl Date: Tue Jul 16 23:03:14 2019 +0000 [Cilk] Fix limit computation on simple _Cilk_for loops with LE or GE condition. This change addresses issue wsmoses/Tapir-LLVM#89. commit c6a078ac984ea627c561fa0df90df63a262305e9 Author: TB Schardl Date: Mon Jul 15 12:07:45 2019 +0000 [Cilk] Add checks, warnings, and code generation for OpenCilk variety of _Cilk_spawn, including spawns of complex statements and spawns in return statements. commit 8d1471640fb2d827b97985f7dbb60cc5b182e0c1 Author: TB Schardl Date: Mon Jul 15 12:05:25 2019 +0000 [Cilk] Add several semantic checks for _Cilk_spawn and _Cilk_for statements. These checks include checks for illegal jumps out of _Cilk_spawn and _Cilk_for statements. commit 40899d2895894646f31494835cae65918283015b Author: TB Schardl Date: Mon Jul 15 12:00:10 2019 +0000 [Cilk] Rework representation of _Cilk_spawn statements in the AST to simplify and fix several issues with code generation of destructors and cleanup around spawned statements. commit 148841cc60a5be5d04ee5df8923979572f4cdce1 Author: TB Schardl Date: Mon Jun 10 00:18:49 2019 +0000 [CilkFor] Various fixes, including compilation failure when limit of a simple _Cilk_for loop used a derived type. commit 6af243de3bd256223d603b6c486c151b1ba257da Author: TB Schardl Date: Sat Jun 8 12:50:40 2019 +0000 [CilkFor] Fix handling of simple cilk_for loops with different strides and comparisons. commit 81537fdfc116ef0e2edbff618a7bd137d8e56884 Author: TB Schardl Date: Thu May 30 02:19:06 2019 +0000 [CGBuiltin] Improved support for spawning builtin functions, although the support is not perfect. commit bab9e353cccb9416634213dfc5b309b11f8f4ebc Author: TB Schardl Date: Wed Jun 5 20:35:19 2019 +0000 Bug fixes for rebase onto version 8. commit 804a775f2b860f53d6a7dea4c3219763a8c39343 Author: TB Schardl Date: Wed Mar 6 14:05:25 2019 +0000 [Driver] Add -fcilktool= flag for using Cilk productivity tools. Currently the flag only supports the Cilkscale tool, but the framework is designed to be easy to extend to handle new tools. The flag does not currently support the Cilksan tool, which is unintuitive. The -fsanitize=cilk flag still works for using the Cilksan nondeterminism detector. commit bdcd5039142951b17aca1a102b8cbfcded9a5c92 Author: TB Schardl Date: Sun Mar 17 18:08:33 2019 -0400 [Cilk] Cleaning up Cilk header files and their installation. commit 6cbfc6e67789a1d1173c015508ec20be3b06bc27 Author: TB Schardl Date: Tue Feb 26 12:37:25 2019 +0000 [CodeGen] Fix code generation for several cases where _Cilk_spawn is used to spawn a complex initialization for a variable. commit 1dcc9cb0f9f94ed427c7651e773d8f18ecbb4e78 Author: TB Schardl Date: Thu Jan 31 14:37:08 2019 +0000 [Test/Cilk] Better tests for return and break statements in _Cilk_for loops. commit a1ea07aa5fe04928df46d70c408186917cebed4c Author: TB Schardl Date: Thu Jan 31 03:18:54 2019 +0000 [SemaStmt] Check for break statements in _Cilk_for loop bodies, which are illegal. commit 66477fd7fc1e0ce705a6b97b822ba366bbe9ec16 Author: TB Schardl Date: Thu Jan 31 03:18:31 2019 +0000 [SemaStmt] Check for return statements in _Cilk_for loop bodies, which are illegal. commit 300564edc57d7dbb183194ed4098ef24c96e1267 Author: TB Schardl Date: Fri Jan 18 13:45:08 2019 +0000 [Frontend] Resolve a compilation warning. commit 849afe95b44727e0c590ef90bba880ba9941266d Author: TB Schardl Date: Fri Dec 21 05:08:42 2018 +0000 [CSI] Add compiler flag to specify when the CSI pass should insert instrumentation during the optimization pipeline. commit 13efc0bb2302c23fb9a1224a2f772852495f8642 Author: TB Schardl Date: Mon Nov 26 12:32:41 2018 +0000 [Cilk] Code cleanup to address warnings from compilation using Clang. commit fd76d20ec5a96d35f267b8c98883f2d0112c3872 Author: TB Schardl Date: Sat Nov 3 23:22:25 2018 -0400 Bug fixes for rebase onto release_70 commit c5dddb372ff90d45a77afa7ec3eec5e595fe1eac Author: TB Schardl Date: Fri Nov 2 12:38:50 2018 +0000 [test/Cilk] Removing is_main property bit from function instrumentation. commit 0156b197f3a0e3e617a2d0a92e0ed210f2bc3e01 Author: TB Schardl Date: Wed Oct 31 23:38:08 2018 +0000 [Driver] Add support for linking cilksan as a static library. commit a76105a233e15229e06e84e86603c252336c4c77 Author: TB Schardl Date: Wed Oct 31 23:34:52 2018 +0000 [CodeGen] Add support for running CilkSanitizer and CSI with the new pass manager. commit c58584e46881ded48244f8abdb9fcf85e14cae0c Author: TB Schardl Date: Wed Oct 31 23:32:51 2018 +0000 [CodeGen] Add support to the new pass manager for compiling Tapir programs to a specified runtime. commit 6e124ac99f54bb5b77a63a7243bdaeb09811c9c0 Author: TB Schardl Date: Fri Oct 19 21:28:21 2018 +0000 [CSI] Adding a property to func_entry and func_exit hooks to identify the 'main' function. commit 59c4cc662edb13da15baede5e2c3eb21904350d1 Author: TB Schardl Date: Wed Oct 10 22:37:37 2018 +0000 [BackendUtil] Using TapirTarget value defined in TargetLibraryInfo. commit 471e3487152ba22364bee1e40aedf95452001db1 Author: TB Schardl Date: Tue Oct 9 03:18:17 2018 +0000 [Tapir] Reorganizing Tapir and Clang options to match semantics. Passing Tapir target for lowering via TargetLibraryInfo. commit 9e9171315b93147ecd83fd141014114c8c79d404 Author: TB Schardl Date: Sat Aug 25 13:30:11 2018 +0000 [CGCilk] Code cleanup to remove unused variable. commit 4ca0469a2bb0f9751edddabcdd1887d4190c4594 Author: TB Schardl Date: Sat Aug 25 13:29:49 2018 +0000 [TapirTargetID] Update to use derive Tapir-target ID, rather than a Tapir target class, from the command-line arguments. commit b12adc7ff34783a925a4bab631ef2d65d102879e Author: TB Schardl Date: Sat Jul 14 12:20:21 2018 +0000 [CSI] Adding functionality such that passing the fcsi flag at link time will automatically link the CSI runtime library. commit b6f5f897130444816536afd4900f5f2672e4973a Author: TB Schardl Date: Sun May 6 13:12:15 2018 +0000 [Cilksan] Ensure that the Cilksan instrumentation pass runs before Tapir lowering, even with optimization flag -O0. commit e705cc4b49ac2646a2b1be5d56d9c83dc651cec8 Author: TB Schardl Date: Sun Apr 29 01:08:04 2018 +0000 [CGCilk] Fixed bug with code generation of cilk_for loops with continue statements. commit f4f024e5e4cad28d7b38a27d33a0544def857552 Author: TB Schardl Date: Sat Mar 24 01:10:29 2018 +0000 [CGExprCXX] Properly handle processing of a CXXOperatorCallExpr inside of a CilkSpawnStmt. commit 7d9f1be5a1178334a452a1f923aa1f3a76fd097c Author: TB Schardl Date: Sun Mar 11 12:14:29 2018 +0000 [Cilk] Code cleanup commit 0e48726944aa537bf653183aa0fad5eae8a72791 Author: TB Schardl Date: Fri Feb 16 17:03:45 2018 +0000 [Cilk] Updated Cilk code generation for detached rethrows to include sync region. commit 9d064a4c6f1db1d6d9dd1f14c0bf72c2646f2bb1 Author: TB Schardl Date: Thu Feb 15 15:03:04 2018 +0000 [BackendUtil] Updating header files in light of reorganization of Tapir lowering code. commit 4076cbd75b8d6835c79d126196c50a13e1543d7e Author: TB Schardl Date: Mon Feb 12 18:01:40 2018 +0000 [Cilk] Adding support for translating Cilk code that involves exceptions into Tapir's representation of exceptions. This commit also cleans up some of the code generation for Cilk. This cleanup includes a reorganization to consolidate more of the code-generation code for Cilk into a single file. commit 0f42e02d985cd5970f5309f4b03298fd2013a08e Author: TB Schardl Date: Sun Feb 11 19:38:54 2018 +0000 [Cilk] Re-enabled the '-ftapir=none' option for emitting Tapir without lowering. This flag is intended for testing and debugging purposes. commit 24d59e7b7854fc928003343eeeee4b7a6fd16928 Author: TB Schardl Date: Fri Jan 19 02:41:36 2018 +0000 [Tapir] Simplifying code for determining Tapir target from command-line flags. commit af7a7490d88c3e45116f2d92867d85bda9dc6019 Author: TB Schardl Date: Thu Jan 18 23:26:47 2018 +0000 [CilkSanitizer] Added function attribute for Cilk sanitizer. commit d39a5dc81d86aa68a13c67286d59b775ba280db3 Author: TB Schardl Date: Sat Jan 13 05:40:03 2018 +0000 Bug fixes for initial rebase onto version 6. commit 9baab874c42a87fcbbcd9792c2335a8ebbf01796 Author: TB Schardl Date: Thu Jan 11 13:50:07 2018 +0000 Squashed commit of the following: commit 51d7b71ff6cb4c026e18ea212e57b979e7b78896 Author: TB Schardl Date: Mon Jan 8 15:22:07 2018 +0000 [Tapir] Removing tapir namespace to work around GCC bug in namespace handling. commit 2f4bebc189341683dfb50fbb09969268650eae34 Author: TB Schardl Date: Mon Jan 8 14:02:11 2018 +0000 [ToolChain] Ensure that, if no Tapir-related compiler flag is specified, then no parallel runtime is included. commit a50f74fa5c21005982d1443104f8bc2f872ba378 Author: TB Schardl Date: Sun Dec 17 01:56:27 2017 +0000 [Tapir] Added -ftapir=cilkr flag to support Tapir targeting the CilkR runtime library. commit 9e81b3be8a7749cb8feea3f6bad30df9b7ba1e75 Author: TB Schardl Date: Sun Dec 10 00:36:13 2017 +0000 Reverted [SemaStmt] Adding Sema checks to _Cilk_for condition expressions. commit f4e7f92bcca91742609be137fd9bc3e7bf1ac024 Author: TB Schardl Date: Thu Dec 7 18:24:59 2017 +0000 [SemaStmt] Adding Sema checks to _Cilk_for condition expressions. commit e052320cd5d8887cf1e0fa5865d45c6c3923ec57 Author: TB Schardl Date: Wed Dec 6 19:05:09 2017 +0000 [_Cilk_for] Fixed warning messages for unsupported cilk grainsize syntax. commit abf875debb8cc01fca32555c02e974d5f34bfd29 Author: TB Schardl Date: Wed Dec 6 15:49:28 2017 +0000 [test] Updated warning-flags test to eliminate unused diagnostic. commit 461858de8ef3abc033733cf845d733b379d65967 Author: TB Schardl Date: Thu Aug 10 22:35:20 2017 +0000 [Basic] Removing unused warning. commit 1ae43af670095fb80183ed74394953cf21204e94 Author: TB Schardl Date: Thu Aug 10 22:34:58 2017 +0000 [CodeGen] Minor cleanup to bring CodeGen files more in line with mainline Clang. commit 20b6579f3908149d160dbb6990ece44645636ca1 Author: TB Schardl Date: Thu Aug 10 22:33:37 2017 +0000 [Cilk] Updating tests to play nice with the testing infrastructure. commit e7e29f9e59617d45b76c48374a650c9e29f4a853 Author: TB Schardl Date: Tue Dec 5 23:50:04 2017 +0000 [Cilk] Bug fix to correctly define preprocessor macros indicating compilation of a Cilk program. commit 4dff7fbf09671eede3a6de04991da001b30c9476 Author: TB Schardl Date: Mon Nov 20 16:25:09 2017 +0000 [Cilksan] Modified "-fsanitize=cilk" to automatically use the Cilksan library from compiler-rt. commit eaf246ef85cae33736dc7b015af97267045a6230 Author: TB Schardl Date: Wed Aug 23 16:20:50 2017 +0000 [CilkSanitizer] Adding a custom instrumentation pass for CilkSan. commit 21bf840e3428d2d7934409d05244ee7d3bc1d8c0 Author: TB Schardl Date: Mon Oct 2 01:46:46 2017 +0000 [Cilk] Adding support for a constant grainsize value on _Cilk_for loops. commit b1fddd86f7b47b63cafa7917e016a34a66cc03d7 Author: TB Schardl Date: Fri Aug 25 22:50:40 2017 +0000 [CilkSanitizer] Integration of CilkSanitizer into the front end. commit 15031af831f40fc6a0cd670137972d92500f874a Author: William S. Moses Date: Wed Oct 4 17:34:27 2017 -0400 Allow openmp backend flag commit a74c03783b70009d74a58b002db5233635fc7e15 Author: William S. Moses Date: Sun Sep 3 14:32:50 2017 -0400 Fix failing tests commit 6eb58f732f8b19addc25692083a8268ace6528fd Merge: fbd3ab69d5 7e8743f82a Author: William S. Moses Date: Sat Sep 2 17:29:11 2017 -0400 Merge branch 'ref_clang' commit 7e8743f82ac7957c66d9c2444996be5b1218673b Author: Richard Smith Date: Thu Aug 31 23:19:49 2017 +0000 Mention the expected change to default -std= in future clang releases. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312293 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5a151ed6454d91d3ae76c56cc0d8d7757f80f931 Author: Richard Smith Date: Wed Aug 30 23:03:58 2017 +0000 Consistently use code font for command-line flags in the release notes. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312189 91177308-0d34-0410-b5e6-96231b3b80d8 commit 243bd1964baf9d79d199e78be1e0627cc1ff0a9b Author: Richard Smith Date: Wed Aug 30 22:58:37 2017 +0000 Add a couple of release note updates for C++ changes since Clang 4. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312187 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1d4ad2c1c46164e9cbfd145814756a48f04f0d53 Author: Hans Wennborg Date: Wed Aug 30 18:43:04 2017 +0000 ReleaseNotes: one back-tick too many git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312155 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5680ea4c0a5c26d4782bc303f0f9cab617ff3e3d Author: Hans Wennborg Date: Wed Aug 30 18:38:07 2017 +0000 ReleaseNotes: remove another in-progress warning git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312152 91177308-0d34-0410-b5e6-96231b3b80d8 commit 100e1a5cd126979d99318021544f3028365320d1 Author: Hans Wennborg Date: Wed Aug 30 18:36:09 2017 +0000 Merging r312149: ------------------------------------------------------------------------ r312149 | hans | 2017-08-30 11:35:44 -0700 (Wed, 30 Aug 2017) | 1 line docs: typo fix ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312150 91177308-0d34-0410-b5e6-96231b3b80d8 commit 544c436321ea8f1b6d245c115faec606de901864 Author: Hans Wennborg Date: Tue Aug 29 17:13:55 2017 +0000 Merging r311823: (+update ClangCommandLineReference.rst) ------------------------------------------------------------------------ r311823 | rsmith | 2017-08-25 18:04:35 -0700 (Fri, 25 Aug 2017) | 16 lines Add flag to request Clang is ABI-compatible with older versions of itself This patch adds a flag -fclang-abi-compat that can be used to request that Clang attempts to be ABI-compatible with some older version of itself. This is provided on a best-effort basis; right now, this can be used to undo the ABI change in r310401, reverting Clang to its prior C++ ABI for pass/return by value of class types affected by that change, and to undo the ABI change in r262688, reverting Clang to using integer registers rather than SSE registers for passing <1 x long long> vectors. The intent is that we will maintain this backwards compatibility path as we make ABI-breaking fixes in future. The reversion to the old behavior for r310401 is also applied to the PS4 target since that change is not part of its platform ABI (which is essentially to do whatever Clang 3.2 did). ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@312013 91177308-0d34-0410-b5e6-96231b3b80d8 commit 0bc78694a319f80a28ca30e4d9d69c292ee12dee Author: Hans Wennborg Date: Fri Aug 25 20:30:43 2017 +0000 Merging r311792: ------------------------------------------------------------------------ r311792 | djasper | 2017-08-25 12:14:53 -0700 (Fri, 25 Aug 2017) | 9 lines [Format] Invert nestingAndIndentLevel pair in WhitespaceManager used for alignments Indent should be compared before nesting level to determine if a token is on the same scope as the one we align with. Because it was inverted, clang-format sometimes tried to align tokens with tokens from outer scopes, causing the assert(Shift >= 0) to fire. This fixes bug #33507. Patch by Beren Minor, thank you! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311800 91177308-0d34-0410-b5e6-96231b3b80d8 commit b47c628b1c5c88a4db60d2dda6411a2365a45dd1 Author: Hans Wennborg Date: Fri Aug 25 20:27:58 2017 +0000 Merging r311695: ------------------------------------------------------------------------ r311695 | rsmith | 2017-08-24 13:10:33 -0700 (Thu, 24 Aug 2017) | 9 lines [ubsan] PR34266: When sanitizing the 'this' value for a member function that happens to be a lambda call operator, use the lambda's 'this' pointer, not the captured enclosing 'this' pointer (if any). Do not sanitize the 'this' pointer of a member call operator for a lambda with no capture-default, since that call operator can legitimately be called with a null this pointer from the static invoker function. Any actual call with a null this pointer should still be caught in the caller (if it is being sanitized). This reinstates r311589 (reverted in r311680) with the above fix. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311799 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9fe12dfae3fad93aec4ba283d5099ee69e7fc164 Author: Hans Wennborg Date: Thu Aug 24 22:38:21 2017 +0000 ReleaseNotes: remove boiler-plate, and minor fixes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311717 91177308-0d34-0410-b5e6-96231b3b80d8 commit fbc4cf960d23ac51b68bfdb9e74d468c3c42fad1 Author: Hans Wennborg Date: Thu Aug 24 22:34:18 2017 +0000 ReleaseNotes: drop in-progress warning git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311716 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7926226534f7c61807c48f53af1ac8d2f6c6c1ca Author: Hans Wennborg Date: Thu Aug 24 16:21:49 2017 +0000 Merging r311601: ------------------------------------------------------------------------ r311601 | adrian | 2017-08-23 14:24:12 -0700 (Wed, 23 Aug 2017) | 5 lines Fix a bug in CGDebugInfo::EmitInlineFunctionStart causing DILocations to be parented in function declarations. Fixes PR33997. https://bugs.llvm.org/show_bug.cgi?id=33997 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311671 91177308-0d34-0410-b5e6-96231b3b80d8 commit f19054656eb4fd4fcee1af3389f10337217dce75 Author: Hans Wennborg Date: Thu Aug 24 15:49:39 2017 +0000 Release Notes fix Patch by Marek Kurdej! git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311668 91177308-0d34-0410-b5e6-96231b3b80d8 commit a040372df3379f40d35981962e15a1aa85c489e7 Author: Hans Wennborg Date: Wed Aug 23 20:50:42 2017 +0000 Revert r309328 and r309290 (which merged r309327 and r309226). The header change caused problems; see PR34182, and PR33858 from #9 onwards, as well as the discussion on the r309226 cfe-commits thread. These changes don't seem to be addressing any regression from 4.0.0, so rather than scrambling to fix this on the branch, let's revert to safety. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311597 91177308-0d34-0410-b5e6-96231b3b80d8 commit e5cdfe0ea82a4f97d01175d67ac1cb6b87d0a56e Author: Hans Wennborg Date: Wed Aug 23 19:56:39 2017 +0000 Merging r311330: ------------------------------------------------------------------------ r311330 | ibiryukov | 2017-08-21 05:03:08 -0700 (Mon, 21 Aug 2017) | 16 lines Fixed a crash on replaying Preamble's PP conditional stack. Summary: The crash occurs when the first token after a preamble is a macro expansion. Fixed by moving replayPreambleConditionalStack from Parser into Preprocessor. It is now called right after the predefines file is processed. Reviewers: erikjv, bkramer, klimek, yvvan Reviewed By: bkramer Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D36872 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311591 91177308-0d34-0410-b5e6-96231b3b80d8 commit 29cf62cecf5ac6fcad53dcd300c3b5c90dfeea77 Author: Hans Wennborg Date: Wed Aug 23 16:49:21 2017 +0000 Merging r311532: ------------------------------------------------------------------------ r311532 | krasimir | 2017-08-23 00:18:36 -0700 (Wed, 23 Aug 2017) | 24 lines [clang-format] Align trailing comments if ColumnLimit is 0 Summary: ColumnLimit = 0 means no limit, so comment should always be aligned if requested. This was broken with https://llvm.org/svn/llvm-project/cfe/trunk@304687 introduced via https://reviews.llvm.org/D33830 and is included in 5.0.0-rc2. This commit fixes it and adds a unittest for this property. Should go into clang-5.0 IMHO. Contributed by @pboettch! Reviewers: djasper, krasimir Reviewed By: djasper, krasimir Subscribers: hans, klimek Differential Revision: https://reviews.llvm.org/D36967 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311573 91177308-0d34-0410-b5e6-96231b3b80d8 commit fbd3ab69d5d6dcad967f2244773a192e29657b9b Author: William S. Moses Date: Wed Aug 23 11:54:04 2017 -0400 Allow tapir command line args to work on LL files commit 8e4862b5be221092291aaf7ea40d72274acb9d96 Author: Hans Wennborg Date: Tue Aug 22 22:27:59 2017 +0000 Merging r311397: ------------------------------------------------------------------------ r311397 | ahatanak | 2017-08-21 15:46:46 -0700 (Mon, 21 Aug 2017) | 8 lines [Driver][Darwin] Do not pass -munwind-table if -fno-excpetions is supplied. With this change, -fno-exceptions disables unwind tables unless -funwind-tables is supplied too or the target is x86-64 (x86-64 requires emitting unwind tables). rdar://problem/33934446 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311505 91177308-0d34-0410-b5e6-96231b3b80d8 commit d3f860e1c0abf686b67b4c2d5ed9cf93081b85ec Author: Hans Wennborg Date: Tue Aug 22 22:01:04 2017 +0000 Merging r311391: ------------------------------------------------------------------------ r311391 | stl_msft | 2017-08-21 15:19:33 -0700 (Mon, 21 Aug 2017) | 28 lines [Driver] Recognize DevDiv internal builds of MSVC, with a different directory structure. This is a reasonably non-intrusive change, which I've verified works for both x86 and x64 DevDiv-internal builds. The idea is to change `bool IsVS2017OrNewer` into a 3-state `ToolsetLayout VSLayout`. Either a build is DevDiv-internal, released VS 2017 or newer, or released VS 2015 or older. When looking at the directory structure, if instead of `"VC"` we see `"x86ret"`, `"x86chk"`, `"amd64ret"`, or `"amd64chk"`, we recognize this as a DevDiv-internal build. After we get past the directory structure validation, we use this knowledge to regenerate paths appropriately. `llvmArchToDevDivInternalArch()` knows how we use `"i386"` subdirectories, and `MSVCToolChain::getSubDirectoryPath()` uses that. It also knows that DevDiv-internal builds have an `"inc"` subdirectory instead of `"include"`. This may still not be the "right" fix in any sense, but I believe that it's non-intrusive in the sense that if the special directory names aren't found, no codepaths are affected. (`ToolsetLayout::OlderVS` and `ToolsetLayout::VS2017OrNewer` correspond to `IsVS2017OrNewer` being `false` or `true`, respectively.) I searched for all references to `IsVS2017OrNewer`, which are places where Clang cares about VS's directory structure, and the only one that isn't being patched is some logic to deal with cross-compilation. I'm fine with that not working for DevDiv-internal builds for the moment (we typically test the native compilers), so I added a comment. Fixes D36860. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311500 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2f63261a06944758063c3e71129dc3b07eb34509 Author: Anastasia Stulova Date: Tue Aug 22 19:29:27 2017 +0000 [Docs] Added release notes for OpenCL. Differential Revision: https://reviews.llvm.org/D36951 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311485 91177308-0d34-0410-b5e6-96231b3b80d8 commit 43adda39841a77047d3127a439311c79ffbf5234 Author: Hans Wennborg Date: Tue Aug 22 17:41:05 2017 +0000 ReleaseNotes: coroutines update from Gor git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311475 91177308-0d34-0410-b5e6-96231b3b80d8 commit 230f29b342ef25486459cf3639386a7cc4cc2b96 Author: Hans Wennborg Date: Tue Aug 22 16:23:19 2017 +0000 Merging r311443: ------------------------------------------------------------------------ r311443 | arphaman | 2017-08-22 03:38:07 -0700 (Tue, 22 Aug 2017) | 15 lines [ObjC] Check written attributes only when synthesizing ambiguous property This commit fixes a bug introduced in r307903. The attribute ambiguity checker that was introduced in r307903 checked all property attributes, which caused errors for source-compatible properties, like: @property (nonatomic, readonly) NSObject *prop; @property (nonatomic, readwrite) NSObject *prop; because the readwrite property would get implicit 'strong' attribute. The ambiguity checker should be concerned about explicitly specified attributes only. rdar://33748089 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311464 91177308-0d34-0410-b5e6-96231b3b80d8 commit 075c718ac2cfb07582c1db6b688a7e193fe1af46 Author: Alex Lorenz Date: Tue Aug 22 13:36:03 2017 +0000 Mention libclang code-completion changes in release notes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311455 91177308-0d34-0410-b5e6-96231b3b80d8 commit e80de58ee48dad77321c4949f631e6c2430767bb Author: Alex Lorenz Date: Tue Aug 22 13:23:54 2017 +0000 Mention #pragma pack PCH serialization change in release notes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311453 91177308-0d34-0410-b5e6-96231b3b80d8 commit c06c17e34a53b10c80c3732b5ddabf4e45c97df5 Author: Alex Lorenz Date: Tue Aug 22 13:15:19 2017 +0000 Mention #pragma clang attribute in the release notes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311452 91177308-0d34-0410-b5e6-96231b3b80d8 commit b601c1207dbea9346b8353e016b626c37433035a Author: Alex Lorenz Date: Tue Aug 22 13:11:19 2017 +0000 Mention the ObjC property synthesis changes in release notes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311451 91177308-0d34-0410-b5e6-96231b3b80d8 commit 168709d3443ba545a95593edc3a9028e26f5ff55 Author: Simon Dardis Date: Tue Aug 22 10:01:35 2017 +0000 [Sema] Update release notes with details of implicit scalar to vector conversions Add notes on this to the C language section, along with the C++ section. Reviewers: bruno, hans Differential Revision: https://reviews.llvm.org/D36954 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311441 91177308-0d34-0410-b5e6-96231b3b80d8 commit 923bd8236e1a0c6009de8d932bf9a0da7ec94b58 Author: Hans Wennborg Date: Mon Aug 21 23:40:02 2017 +0000 Merging r310983: ------------------------------------------------------------------------ r310983 | rsmith | 2017-08-15 18:49:53 -0700 (Tue, 15 Aug 2017) | 31 lines PR19668, PR23034: Fix handling of move constructors and deleted copy constructors when deciding whether classes should be passed indirectly. This fixes ABI differences between Clang and GCC: * Previously, Clang ignored the move constructor when making this determination. It now takes the move constructor into account, per https://github.com/itanium-cxx-abi/cxx-abi/pull/17 (this change may seem recent, but the ABI change was agreed on the Itanium C++ ABI list a long time ago). * Previously, Clang's behavior when the copy constructor was deleted was unstable -- depending on whether the lazy declaration of the copy constructor had been triggered, you might get different behavior. We now eagerly declare the copy constructor whenever its deletedness is unclear, and ignore deleted copy/move constructors when looking for a trivial such constructor. This also fixes an ABI difference between Clang and MSVC: * If the copy constructor would be implicitly deleted (but has not been lazily declared yet), for instance because the class has an rvalue reference member, we would pass it directly. We now pass such a class indirectly, matching MSVC. Based on a patch by Vassil Vassilev, which was based on a patch by Bernd Schmidt, which was based on a patch by Reid Kleckner! This is a re-commit of r310401, which was reverted in r310464 due to ARM failures (which should now be fixed). ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311410 91177308-0d34-0410-b5e6-96231b3b80d8 commit 16b986343f64f416abbecfa0da51a481e86cf588 Author: Hans Wennborg Date: Mon Aug 21 20:27:26 2017 +0000 Merging r311182: ------------------------------------------------------------------------ r311182 | alexshap | 2017-08-18 11:20:43 -0700 (Fri, 18 Aug 2017) | 22 lines [analyzer] Fix modeling of constructors This diff fixes analyzer's crash (triggered assert) on the newly added test case. The assert being discussed is assert(!B.lookup(R, BindingKey::Direct)) in lib/StaticAnalyzer/Core/RegionStore.cpp, however the root cause is different. For classes with empty bases the offsets might be tricky. For example, let's assume we have struct S: NonEmptyBase, EmptyBase { ... }; In this case Clang applies empty base class optimization and the offset of EmptyBase will be 0, it can be verified via clang -cc1 -x c++ -v -fdump-record-layouts main.cpp -emit-llvm -o /dev/null. When the analyzer tries to perform zero initialization of EmptyBase it will hit the assert because that region has already been "written" by the constructor of NonEmptyBase. Test plan: make check-all Differential revision: https://reviews.llvm.org/D36851 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311378 91177308-0d34-0410-b5e6-96231b3b80d8 commit 606412eb2a44f0513512a1fbba7c5872c420bd43 Author: Martin Storsjo Date: Mon Aug 21 18:45:39 2017 +0000 Update Clang 5.0 release notes for ms_abi and __builtin_ms_va_list for aarch64 Differential Revision: https://reviews.llvm.org/D36931 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311359 91177308-0d34-0410-b5e6-96231b3b80d8 commit 46ae5f5b0646bad9e7ac46dc84fc74e5fd1f3474 Author: Alex Lorenz Date: Mon Aug 21 17:47:51 2017 +0000 Mention some warning-related additions and changes for LLVM 5 release notes git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311357 91177308-0d34-0410-b5e6-96231b3b80d8 commit d12acdc7acb1cdfbe746c1841fe139f46ff072e7 Author: Jonathan Coe Date: Sat Aug 19 01:24:47 2017 +0000 Update LLVM 5.0 release notes for clang.cindex changes Summary: This patch should be applied to clang 5.0 release notes, NOT to trunk. Reviewers: rengolin, hans Reviewed By: hans Differential Revision: https://reviews.llvm.org/D36902 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311219 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6fe97e5ae9416b6ec66da3cf03b703b3798206d1 Author: Dominic Chen Date: Sat Aug 19 00:09:24 2017 +0000 Add release notes for r299463. Implement z3-based constraint solver backend for clang static analyzer. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311213 91177308-0d34-0410-b5e6-96231b3b80d8 commit 502461a8c938b8ccd464d8d79e3b098b84cae6d3 Author: Hans Wennborg Date: Fri Aug 18 20:28:06 2017 +0000 Merging r311115: ------------------------------------------------------------------------ r311115 | rsmith | 2017-08-17 12:35:50 -0700 (Thu, 17 Aug 2017) | 2 lines PR34161: support evaluation of 'void()' expressions in C++14 onwards. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311194 91177308-0d34-0410-b5e6-96231b3b80d8 commit ae3d7833d0fdf5be7149a1708302dc8f4c0ef99b Author: Hans Wennborg Date: Thu Aug 17 17:26:33 2017 +0000 Merging r310776: ------------------------------------------------------------------------ r310776 | rsmith | 2017-08-11 18:46:03 -0700 (Fri, 11 Aug 2017) | 8 lines PR34163: Don't cache an incorrect key function for a class if queried between the class becoming complete and its inline methods being parsed. This replaces the hack of using the "late parsed template" flag to track member functions with bodies we've not parsed yet; instead we now use the "will have body" flag, which carries the desired implication that the function declaration *is* a definition, and that we've just not parsed its body yet. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311105 91177308-0d34-0410-b5e6-96231b3b80d8 commit f1c97542ff84f0a49cdc2af11cc30d499cd9adba Author: Hans Wennborg Date: Thu Aug 17 16:48:03 2017 +0000 Merging r310672: ------------------------------------------------------------------------ r310672 | ahatanak | 2017-08-10 17:06:49 -0700 (Thu, 10 Aug 2017) | 7 lines [Sema][ObjC] Fix spurious -Wcast-qual warnings. We do not meaningfully track object const-ness of Objective-C object types. Silence the -Wcast-qual warning that is issued when casting to or from Objective-C object types results in losing const qualification. rdar://problem/33807915 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@311095 91177308-0d34-0410-b5e6-96231b3b80d8 commit af145231499333600c48437ed838de2903172704 Author: Hans Wennborg Date: Tue Aug 15 00:24:02 2017 +0000 Merging r310706 and r310829: ------------------------------------------------------------------------ r310706 | arphaman | 2017-08-11 05:06:52 -0700 (Fri, 11 Aug 2017) | 11 lines [modules] Set the lexical DC for dummy tag decls that refer to hidden declarations that are made visible after the dummy is parsed and ODR verified Prior to this commit the "(getContainingDC(DC) == CurContext && "The next DeclContext should be lexically contained in the current one.")," assertion failure was triggered during semantic analysis of the dummy tag declaration that was declared in another tag declaration because its lexical context did not point to the outer tag decl. rdar://32292196 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r310829 | arphaman | 2017-08-14 03:59:44 -0700 (Mon, 14 Aug 2017) | 5 lines Set the lexical context for dummy tag decl inside createTagFromNewDecl This is a follow-up to r310706. This change has been recommended by Bruno Cardoso Lopes and Richard Smith. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310902 91177308-0d34-0410-b5e6-96231b3b80d8 commit b52dc49165b05501903ea7355e646467da025c3a Author: Hans Wennborg Date: Mon Aug 14 17:27:59 2017 +0000 Merging r310804: ------------------------------------------------------------------------ r310804 | rsmith | 2017-08-13 15:26:53 -0700 (Sun, 13 Aug 2017) | 2 lines Replace remaining user-visible mentions of C++1z with C++17. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310862 91177308-0d34-0410-b5e6-96231b3b80d8 commit 63c6fd256d0bccded905f0b05158939291d3f151 Author: Hans Wennborg Date: Mon Aug 14 15:48:04 2017 +0000 Merging r310516: ------------------------------------------------------------------------ r310516 | hans | 2017-08-09 13:12:53 -0700 (Wed, 09 Aug 2017) | 13 lines Make -std=c++17 an alias of -std=c++1z As suggested on PR33912. Trying to keep this small to make it easy to merge to the 5.0 branch. We can do a follow-up with more thorough renaming (diagnostic text, options, ids, etc.) later. (For C++14 this was done in r215982, and I think a smaller patch for the 3.5 branch: http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20140818/113013.html) Differential Revision: https://reviews.llvm.org/D36532 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310848 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6ee431111dca9fb627eb19800bcd184c460386d1 Author: Hans Wennborg Date: Fri Aug 11 16:32:49 2017 +0000 Merging r310704: ------------------------------------------------------------------------ r310704 | smaksimovic | 2017-08-11 04:39:07 -0700 (Fri, 11 Aug 2017) | 8 lines Revert r302670 for the upcoming 5.0.0 release This is causing failures when compiling clang with -O3 as one of the structures used by clang is passed by value and uses the fastcc calling convention. Faliures manifest for stage2 mips build. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310728 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9d12f17ded3f2a4350ed2d2116413d5ca2a54d9a Author: Hans Wennborg Date: Fri Aug 11 16:30:46 2017 +0000 Revert r310074 (see PR34067 #4) git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310726 91177308-0d34-0410-b5e6-96231b3b80d8 commit e48ea412e6a97503202474c126ab083602bce60f Author: Hans Wennborg Date: Fri Aug 11 16:18:44 2017 +0000 Merging r310700: ------------------------------------------------------------------------ r310700 | yamaguchi | 2017-08-11 02:44:42 -0700 (Fri, 11 Aug 2017) | 11 lines [Bash-autocompletion] Add --autocomplete flag to 5.0 release notes Summary: I thought we should add this information to release notes, because we added a new flag to clang driver. Reviewers: v.g.vassilev, teemperor, ruiu Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D36567 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310723 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9f61e63493574b244e34c6ac049853e2da887a43 Author: Hans Wennborg Date: Fri Aug 11 16:16:08 2017 +0000 Merging r310694: ------------------------------------------------------------------------ r310694 | rsmith | 2017-08-10 20:39:40 -0700 (Thu, 10 Aug 2017) | 2 lines Implement latest feature test macro recommendations, P0096R4. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310722 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3b16c8b2d526bff51bcaaee74ddbbf364b9365e7 Author: Hans Wennborg Date: Fri Aug 11 16:14:07 2017 +0000 Merging r309054: ------------------------------------------------------------------------ r309054 | rsmith | 2017-07-25 16:31:42 -0700 (Tue, 25 Jul 2017) | 2 lines Reorder tests to match latest SD-6 draft. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310721 91177308-0d34-0410-b5e6-96231b3b80d8 commit d84167fd28cd57353c9b02d22bc7d4a4eeabf2e5 Author: Hans Wennborg Date: Fri Aug 11 16:07:17 2017 +0000 Merging r310692: ------------------------------------------------------------------------ r310692 | rsmith | 2017-08-10 20:14:20 -0700 (Thu, 10 Aug 2017) | 2 lines PR33850: Update cxx_dr_status for Clang 5 branch. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310720 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9ae15462ce434ac27321fd104c2969941fc70b71 Author: Hans Wennborg Date: Fri Aug 11 15:58:58 2017 +0000 Merging r310691: ------------------------------------------------------------------------ r310691 | rsmith | 2017-08-10 19:04:19 -0700 (Thu, 10 Aug 2017) | 2 lines PR33489: A function-style cast to a deduced class template specialization type is type-dependent if it can't be resolved due to a type-dependent argument. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310719 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4af01079ce2f2d2528d72b8ce5be018f68ea7d73 Author: Hans Wennborg Date: Fri Aug 11 01:47:32 2017 +0000 Merging r310006: ------------------------------------------------------------------------ r310006 | ahatanak | 2017-08-03 16:55:42 -0700 (Thu, 03 Aug 2017) | 22 lines [Driver][Darwin] Pass -munwind-table when !UseSjLjExceptions. This commit fixes a bug where clang/llvm doesn't emit an unwind table for a function when it is marked noexcept. Without this patch, the following code terminates with an uncaught exception on ARM64: int foo1() noexcept { try { throw 0; } catch (int i) { return 0; } return 1; } int main() { return foo1(); } rdar://problem/32411865 Differential Revision: https://reviews.llvm.org/D35693 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310677 91177308-0d34-0410-b5e6-96231b3b80d8 commit 13722d89229f5766535e66bdb82fd876f8ac1c2c Author: Hans Wennborg Date: Fri Aug 11 01:45:48 2017 +0000 Merging r309633, r309636 and r309640: ------------------------------------------------------------------------ r309633 | ahatanak | 2017-07-31 15:19:34 -0700 (Mon, 31 Jul 2017) | 6 lines [Driver] Make sure the deployment target is earlier than iOS 11 when it is inferred from -isysroot. This fixes a change that was inadvertently introduced in r309607. rdar://problem/32230613 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r309636 | ahatanak | 2017-07-31 15:46:00 -0700 (Mon, 31 Jul 2017) | 1 line Silence warning -Wmissing-sysroot. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r309640 | ahatanak | 2017-07-31 16:08:52 -0700 (Mon, 31 Jul 2017) | 1 line Use -target instead of -arch in test case. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310676 91177308-0d34-0410-b5e6-96231b3b80d8 commit ad0ce25f78a95689d1bbe14512f69badb22a43df Author: Hans Wennborg Date: Fri Aug 11 01:41:23 2017 +0000 Merging r309607: ------------------------------------------------------------------------ r309607 | ahatanak | 2017-07-31 12:16:40 -0700 (Mon, 31 Jul 2017) | 6 lines [Driver] Allow users to silence the warning that is issued when the deployment target is earlier than iOS 11 and the target is 32-bit. This is a follow-up to r306922. rdar://problem/32230613 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310675 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6a4d8ba324f430182d7fe0eeeb1227138e4cc5fa Author: Hans Wennborg Date: Fri Aug 11 01:00:59 2017 +0000 Merging r309569: ------------------------------------------------------------------------ r309569 | alexfh | 2017-07-31 08:21:26 -0700 (Mon, 31 Jul 2017) | 39 lines Fix -Wshadow false positives with function-local classes. Summary: Fixes http://llvm.org/PR33947. https://godbolt.org/g/54XRMT void f(int a) { struct A { void g(int a) {} A() { int a; } }; } 3 : :3:16: warning: declaration shadows a local variable [-Wshadow] void g(int a) {} ^ 1 : :1:12: note: previous declaration is here void f(int a) { ^ 4 : :4:15: warning: declaration shadows a local variable [-Wshadow] A() { int a; } ^ 1 : :1:12: note: previous declaration is here void f(int a) { ^ 2 warnings generated. The local variable `a` of the function `f` can't be accessed from a method of the function-local class A, thus no shadowing occurs and no diagnostic is needed. Reviewers: rnk, rsmith, arphaman, Quuxplusone Reviewed By: rnk, Quuxplusone Subscribers: Quuxplusone, cfe-commits Differential Revision: https://reviews.llvm.org/D35941 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310674 91177308-0d34-0410-b5e6-96231b3b80d8 commit 8bca255b155a42f114e73d904babc1c3e441f41b Author: Hans Wennborg Date: Tue Aug 8 18:38:07 2017 +0000 Merging r308722: ------------------------------------------------------------------------ r308722 | ibiryukov | 2017-07-21 02:24:00 -0700 (Fri, 21 Jul 2017) | 13 lines Fixed failing assert in code completion. Summary: The code was accessing uninstantiated default argument. This resulted in failing assertion at ParmVarDecl::getDefaultArg(). Reviewers: erikjv, klimek, bkramer, krasimir Reviewed By: krasimir Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D35682 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310395 91177308-0d34-0410-b5e6-96231b3b80d8 commit 09efbb85a6403e480ebf70445d0d6ce1815ed24e Author: Hans Wennborg Date: Tue Aug 8 18:34:26 2017 +0000 Merging r309263: ------------------------------------------------------------------------ r309263 | psmith | 2017-07-27 03:43:53 -0700 (Thu, 27 Jul 2017) | 6 lines [CodeGen][ARM] ARM runtime helper functions are not always soft-fp Re-commit r309257 with less precise register checks in arm-float-helpers.c test. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310393 91177308-0d34-0410-b5e6-96231b3b80d8 commit 05a0489dadd09cf9bc6694b63872877a869918ae Author: Hans Wennborg Date: Tue Aug 8 18:15:02 2017 +0000 Merging r310359: ------------------------------------------------------------------------ r310359 | n.bozhenov | 2017-08-08 07:13:50 -0700 (Tue, 08 Aug 2017) | 4 lines [libclang] Fix PR34055 (incompatible update of clang-c/Index.h) Fixes a regression introduced by r308218. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310390 91177308-0d34-0410-b5e6-96231b3b80d8 commit 8df2b6dec1451e65ab500939da3e9bf394236a42 Author: Hans Wennborg Date: Mon Aug 7 20:45:55 2017 +0000 Merging r310158: ------------------------------------------------------------------------ r310158 | rtrieu | 2017-08-04 17:54:19 -0700 (Fri, 04 Aug 2017) | 8 lines [ODRHash] Treat some non-templated classes as templated. When using nested classes, if the inner class is not templated, but the outer class is templated, the inner class will not be templated, but may have some traits as if it were. This is particularly evident if the inner class refers to the outer class in some fashion. Treat any class that is in the context of a templated class as also a templated class. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310302 91177308-0d34-0410-b5e6-96231b3b80d8 commit ef9c5ea4c7fced70a44b90887b90f72e45441802 Author: Hans Wennborg Date: Mon Aug 7 20:15:58 2017 +0000 Merging r310191: ------------------------------------------------------------------------ r310191 | ctopper | 2017-08-05 16:35:54 -0700 (Sat, 05 Aug 2017) | 18 lines [X86] Enable isel to use the PAUSE instruction even when SSE2 is disabled. Clang part Summary: On older processors this instruction encoding is treated as a NOP. MSVC doesn't disable intrinsics based on features the way clang/gcc does. Because the PAUSE instruction encoding doesn't crash older processors, some software out there uses these intrinsics without checking for SSE2. This change also seems to also be consistent with gcc behavior. Fixes PR34079 Reviewers: RKSimon, zvi Reviewed By: RKSimon Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D36362 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310294 91177308-0d34-0410-b5e6-96231b3b80d8 commit 22ec3c010bb6e43db3058248057f01389ccdaa13 Author: Hans Wennborg Date: Fri Aug 4 17:19:44 2017 +0000 Merging r310057: ------------------------------------------------------------------------ r310057 | smaksimovic | 2017-08-04 05:37:34 -0700 (Fri, 04 Aug 2017) | 8 lines Revert r304953 for release 5.0.0 This is causing failures when compiling clang with -O3 as one of the structures used by clang is passed by value and uses the fastcc calling convention. Faliures manifest for stage2 mips build. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310074 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9ac40524fe1216e5d8e475e0cd24a2bd2f55cedd Author: Hans Wennborg Date: Fri Aug 4 16:48:43 2017 +0000 Merging r309975: (except the docs/ part) ------------------------------------------------------------------------ r309975 | rsmith | 2017-08-03 12:24:27 -0700 (Thu, 03 Aug 2017) | 4 lines Don't emit undefined-internal warnings for CXXDeductionGuideDecls. Patch by ~paul (cynecx on phabricator)! Some test massaging by me. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@310067 91177308-0d34-0410-b5e6-96231b3b80d8 commit 676729f19a8b81c98a72239633feecdb54ed08cf Author: Hans Wennborg Date: Thu Aug 3 16:12:51 2017 +0000 Merging r308996: ------------------------------------------------------------------------ r308996 | gornishanov | 2017-07-25 11:01:49 -0700 (Tue, 25 Jul 2017) | 9 lines [coroutines] Add serialization/deserialization of coroutines Reviewers: rsmith Reviewed By: rsmith Subscribers: EricWF, cfe-commits Differential Revision: https://reviews.llvm.org/D35383 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309954 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78efc4ca88a25e1e0d5971863d6d38633d440fb0 Author: TB Schardl Date: Thu Aug 3 13:06:01 2017 +0000 [README] Attempting to clean up README file. commit c4d55f5857a1e3a7f41bc75914fd43da30ae6326 Author: Hans Wennborg Date: Wed Aug 2 17:42:08 2017 +0000 Merging r309523: ------------------------------------------------------------------------ r309523 | brad | 2017-07-30 14:13:59 -0700 (Sun, 30 Jul 2017) | 2 lines Also pass -pie back to the linker when linking on OpenBSD. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309844 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2239da4a1b47021c70e9a5aa4d0061412ea9f91b Author: Hans Wennborg Date: Tue Aug 1 23:54:32 2017 +0000 Merging r309722: ------------------------------------------------------------------------ r309722 | bruno | 2017-08-01 12:05:25 -0700 (Tue, 01 Aug 2017) | 7 lines [Sema] Fix lax conversion between non ext vectors r282968 introduced a regression due to the lack of proper testing. Re-add lax conversion support between non ext vectors for compound assignments and add a test for that. rdar://problem/28639467 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309770 91177308-0d34-0410-b5e6-96231b3b80d8 commit 90047174d9c34dedc5ac750fcd1fe0d7c738491c Author: Hans Wennborg Date: Tue Aug 1 23:32:23 2017 +0000 Merging r309752: ------------------------------------------------------------------------ r309752 | bruno | 2017-08-01 15:10:36 -0700 (Tue, 01 Aug 2017) | 6 lines [Headers][Darwin] Allow #include_next to work on Darwin prior to 10.7 This fixes PR31504 and it's a follow up from adding #include_next for Darwin in r289018. rdar://problem/29856682 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309764 91177308-0d34-0410-b5e6-96231b3b80d8 commit 888cdb53e0f71201556f7d19d59c6bc3a582baf2 Author: Hans Wennborg Date: Mon Jul 31 17:29:29 2017 +0000 Merging r309382: ------------------------------------------------------------------------ r309382 | rksimon | 2017-07-28 06:47:02 -0700 (Fri, 28 Jul 2017) | 3 lines [X86] Add tests showing inability of vector non-temporal load/store intrinsic to force pointer alignment (PR33830) Clang specifies a max type alignment of 16 bytes on darwin targets, meaning that the builtin nontemporal stores don't correctly align the loads/stores to 32 or 64 bytes when required, resulting in lowering to temporal unaligned loads/stores. ------------------------------------------------------------------------ Merging r309383: ------------------------------------------------------------------------ r309383 | rksimon | 2017-07-28 07:01:51 -0700 (Fri, 28 Jul 2017) | 1 line Strip trailing whitespace. NFCI. ------------------------------------------------------------------------ Merging r309488: ------------------------------------------------------------------------ r309488 | rksimon | 2017-07-29 08:33:34 -0700 (Sat, 29 Jul 2017) | 7 lines [X86][AVX] Ensure vector non-temporal load/store intrinsics force pointer alignment (PR33830) Clang specifies a max type alignment of 16 bytes on darwin targets (annoyingly in the driver not via cc1), meaning that the builtin nontemporal stores don't correctly align the loads/stores to 32 or 64 bytes when required, resulting in lowering to temporal unaligned loads/stores. This patch casts the vectors to explicitly aligned types prior to the load/store to ensure that the require alignment is respected. Differential Revision: https://reviews.llvm.org/D35996 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309588 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6f5e1cc9f728bac74f436943ed9dab704c426ef5 Author: Hans Wennborg Date: Mon Jul 31 17:00:55 2017 +0000 Merging r309503: ------------------------------------------------------------------------ r309503 | rsmith | 2017-07-29 23:31:29 -0700 (Sat, 29 Jul 2017) | 6 lines PR33902: Invalidate line number cache when adding more text to existing buffer. This led to crashes as the line number cache would report a bogus line number for a line of code, and we'd try to find a nonexistent column within the line when printing diagnostics. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309580 91177308-0d34-0410-b5e6-96231b3b80d8 commit 36c4419a8ff1c6ff223dad5cc42ec220d8d4413e Author: Hans Wennborg Date: Fri Jul 28 21:31:07 2017 +0000 Merging r309113: ------------------------------------------------------------------------ r309113 | yamaguchi | 2017-07-26 06:36:58 -0700 (Wed, 26 Jul 2017) | 19 lines [Bash-autocompletion] Show HelpText with possible flags Summary: `clang --autocomplete=-std` will show ``` -std: Language standard to compile for -std= Language standard to compile for -stdlib= C++ standard library to use ``` after this change. However, showing HelpText with completion in bash seems super tricky, so this feature will be used in other shells (fish, zsh...). Reviewers: v.g.vassilev, teemperor, ruiu Subscribers: cfe-commits, hiraditya Differential Revision: https://reviews.llvm.org/D35759 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309438 91177308-0d34-0410-b5e6-96231b3b80d8 commit a2e91b76cde21cad4d160e3b078deda5b9b41af2 Author: Hans Wennborg Date: Fri Jul 28 21:25:21 2017 +0000 Merging r309112: ------------------------------------------------------------------------ r309112 | yamaguchi | 2017-07-26 06:30:36 -0700 (Wed, 26 Jul 2017) | 7 lines [Bash-completion] Fixed a bug that file doesn't autocompleted after = Summary: File path wasn't autocompleted after `-fmodule-cache-path=[tab]`, so fixed this bug by checking if $flags contains only a newline or not. Differential Revision: https://reviews.llvm.org/D35763 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309435 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ca288a74db287f2b94e7723f74a224c8f20056a Author: Hans Wennborg Date: Thu Jul 27 22:08:00 2017 +0000 Merging r309327: ------------------------------------------------------------------------ r309327 | compnerd | 2017-07-27 14:56:25 -0700 (Thu, 27 Jul 2017) | 5 lines Headers: fix _Unwind_{G,S}etGR for non-EHABI targets The EHABI definition was being inlined into the users even when EHABI was not in use. Adjust the condition to ensure that the right version is defined. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309328 91177308-0d34-0410-b5e6-96231b3b80d8 commit ce8c97e6531764a45df229d82e49a4ddb7980671 Author: Hans Wennborg Date: Thu Jul 27 16:45:43 2017 +0000 Merging r309226: ------------------------------------------------------------------------ r309226 | compnerd | 2017-07-26 15:55:23 -0700 (Wed, 26 Jul 2017) | 13 lines Headers: improve ARM EHABI coverage of unwind.h Ensure that we define the `_Unwind_Control_Block` structure used on ARM EHABI targets. This is needed for building libc++abi with the unwind.h from the resource dir. A minor fallout of this is that we needed to create a typedef for _Unwind_Exception to work across ARM EHABI and non-EHABI targets. The structure definitions here are based originally on the documentation from ARM under the "Exception Handling ABI for the ARM® Architecture" Section 7.2. They are then adjusted to more closely reflect the definition in libunwind from LLVM. Those changes are compatible in layout but permit easier use in libc++abi and help maintain compatibility between libunwind and the compiler provided definition. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309290 91177308-0d34-0410-b5e6-96231b3b80d8 commit 269497e3c37764ceb4e87ce9763412aca118c99d Author: Hans Wennborg Date: Thu Jul 27 16:20:45 2017 +0000 Revert r304899 and r304836: It's not clear printing all targets with --version is the right thing to do (see discussion on D33900) git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309285 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7a0666d0f312ee33dba0935018e7b758b89d202c Author: Hans Wennborg Date: Wed Jul 26 16:35:53 2017 +0000 Merging r309058: ------------------------------------------------------------------------ r309058 | majnemer | 2017-07-25 16:33:58 -0700 (Tue, 25 Jul 2017) | 9 lines [CodeGen] Correctly model std::byte's aliasing properties std::byte, when defined as an enum, needs to be given special treatment with regards to its aliasing properties. An array of std::byte is allowed to be used as storage for other types. This fixes PR33916. Differential Revision: https://reviews.llvm.org/D35824 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309135 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3378e4c41b473dcb573f8d7c99fc15eb8e855428 Author: Hans Wennborg Date: Wed Jul 26 16:15:18 2017 +0000 Merging r308824: ------------------------------------------------------------------------ r308824 | yamaguchi | 2017-07-22 05:35:15 -0700 (Sat, 22 Jul 2017) | 5 lines [Bash-autocompletion] Fixed typo and add '-' after -Wno Summary: -Wno- was autocompleted as -Wno, so fixed this typo. Differential Revision: https://reviews.llvm.org/D35762 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@309130 91177308-0d34-0410-b5e6-96231b3b80d8 commit cbaa84db28323e892ac1188a5f2fb60a88d5b92e Author: Hans Wennborg Date: Tue Jul 25 17:10:17 2017 +0000 Merging r308897: ------------------------------------------------------------------------ r308897 | nico | 2017-07-24 09:54:11 -0700 (Mon, 24 Jul 2017) | 9 lines Work around an MSVC2017 update 3 codegen bug. C2017 update 3 produces a clang that crashes when compiling clang. Disabling optimizations for StmtProfiler::VisitCXXOperatorCallExpr() makes the crash go away. Patch from Bruce Dawson ! https://reviews.llvm.org/D35757 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@308988 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4c3e691e191e1bef23ee65d4c8d8a640a51c6733 Author: Hans Wennborg Date: Fri Jul 21 08:17:53 2017 +0000 Regenerate ClangCommandLineReference.rst I ran: $ bin/clang-tblgen -gen-opt-docs -I../cfe.src/include \ -I../cfe.src/include/clang/Driver -I../llvm.src/include \ ../cfe.src/include/clang/Driver/ClangOptionDocs.td \ -o ../cfe.src/docs/ClangCommandLineReference.rst git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@308720 91177308-0d34-0410-b5e6-96231b3b80d8 commit 404119f2dfa99d3fe0efa85c715256c6db19076c Author: Hans Wennborg Date: Wed Jul 19 14:44:30 2017 +0000 Generate docs/AttributeReference.rst $ bin/clang-tblgen -gen-attr-docs -I../cfe.src/include \ ../cfe.src/include/clang/Basic/Attr.td \ -o ../cfe.src/docs/AttributeReference.rst git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@308481 91177308-0d34-0410-b5e6-96231b3b80d8 commit d916f2627f2d2936405d973bf354a5b3cbd4afb8 Author: Hans Wennborg Date: Wed Jul 19 13:02:51 2017 +0000 Merging r308455: ------------------------------------------------------------------------ r308455 | hans | 2017-07-19 05:31:01 -0700 (Wed, 19 Jul 2017) | 16 lines Revert r308441 "Recommit r308327: Add a warning for missing '#pragma pack (pop)' and suspicious uses of '#pragma pack' in included files" This seems to have broken the sanitizer-x86_64-linux buildbot. Reverting until it's fixed, especially since this landed just before the 5.0 branch. > This commit adds a new -Wpragma-pack warning. It warns in the following cases: > > - When a translation unit is missing terminating #pragma pack (pop) directives. > - When entering an included file if the current alignment value as determined > by '#pragma pack' directives is different from the default alignment value. > - When leaving an included file that changed the state of the current alignment > value. > > rdar://10184173 > > Differential Revision: https://reviews.llvm.org/D35484 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@308457 91177308-0d34-0410-b5e6-96231b3b80d8 commit cee8fa8282d9c3715bd90910977a7a4767817aa7 Author: Hans Wennborg Date: Wed Jul 19 12:20:43 2017 +0000 Creating release_50 branch off revision 308441 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_50@308444 91177308-0d34-0410-b5e6-96231b3b80d8 commit cb4e7cc1202c13504de32b352091076560be42c6 Author: TB Schardl Date: Mon Jul 10 13:34:50 2017 +0000 [Cilk] Add dummy cleanup for invokes within detached loop body, to ensure proper structure of exception-handling IR. commit 4a722288b688a70f9acd9265c9224f447e401e92 Author: TB Schardl Date: Mon Jul 10 13:08:58 2017 +0000 [CSI] Re-added the -fcsi flag, which was removed by mistake in a previous merge. commit 9ea3e02ca03c9cd94166b0a3dfd32cf97aa78b1f Author: TB Schardl Date: Thu Jun 29 14:06:41 2017 +0000 [test/Cilk] Adding some placeholder Cilk tests. commit ead57d1851d56d70894d05ea2aa33ff184ef3fe6 Author: TB Schardl Date: Tue Jun 27 21:54:01 2017 +0000 [CGExpr] Removing some debug output. commit 744a39b686eea71e62f7edfad56f58246649ce42 Author: TB Schardl Date: Tue Jun 27 21:53:42 2017 +0000 [CodeGen] Adding code to generate Tapir IR instructions in sync regions. commit 8c7e6cc423081453cac68c0bf5317a784e50a314 Author: TB Schardl Date: Sat Jun 17 23:34:30 2017 +0000 [SemaStmt] Add diagnostic checks on the validity of the initialization statement of a _Cilk_for. commit 9ab51fd4932804c88dec2d27559de049d0aa130e Author: TB Schardl Date: Sat Jun 3 12:42:55 2017 +0000 Fixed a warning message. NFC. commit 56f7b9453af4e7915ef7a9a31007780ea160c764 Merge: f6a794052f b4530cfcf7 Author: TB Schardl Date: Fri Jun 2 15:44:26 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit f6a794052fa2caef25c3f0fa6251341dbfd52e43 Merge: 9470fafb08 a7fae60ad9 Author: TB Schardl Date: Wed May 31 01:42:41 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit 9470fafb08354ff3635468b6815213fa0d4194d5 Merge: eb9c842a18 530e88891b Author: TB Schardl Date: Tue May 30 18:20:26 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit eb9c842a187b13133a795287afd628c71b0e5955 Merge: 16b791b59f 1b66edc097 Author: TB Schardl Date: Fri May 26 12:12:47 2017 +0000 Merge branch 'master' of github.com:wsmoses/Cilk-Clang commit 16b791b59fc3e5aea31b64b637a4eb90b6887068 Author: TB Schardl Date: Thu May 25 13:11:21 2017 +0000 [Cilk] Added preliminary support for _Cilk_spawn assignments and declarations. This commit adds support for the following statements and expressions: var = _Cilk_spawn function-call vardecl = _Cilk_spawn function-call This support should be easily extensible to support spawned compound assignments by means of Cilk inlets. To support these statements and expressions, this commit adds a CilkSpawnExpr expression type, which has a single child subexpression. Although _Cilk_spawn cannot be applied to any expression, this approach of adding a general CilkSpawnExpr seems to be the simplest for adapting the parser to handle the cases we care about. Additional semantic checks should be added later to ensure that a CilkSpawnExpr appears only where it makes semantic sense. _Cilk_spawn assignments and declarations differ from ordinary assignments and declarations in their order of evaluation. For a typical assignment or declaration, E1 @= E2 C++17 rules dictate that E2 is evaluated before E1. When E2 is preceded by a _Cilk_spawn, however, we no longer guarantee that E2 is evaluated before E1. (This commit, in fact, ensures that E1 is evaluated before E2 if E2 is spawned.) In effect, the _Cilk_spawn weakens the evaluation-order guarantees of the spawned expression and code evaluated afterwards. This commit also improves the handling of evaluated expressions around Cilk constructs. These improvements include fixes to the evaluation order of arguments to a spawned function, creation of temporary storage for spawned function calls, and processing of exception-handling code created for spawned expressions. This commit also reorganizes some Cilk-specific code into separate files. commit 5b2cfc4cea9143fd98494d78b265cba91ac70fea Author: TB Schardl Date: Thu May 25 12:58:26 2017 +0000 [SemaStmtAttr] Permit Clang's loop pragmas on _Cilk_for loops. commit 1b66edc097bf7855a5cf7b225a4ae8cb182c6d43 Author: William S. Moses Date: Mon May 15 01:17:30 2017 -0400 Add Rhino flag commit 5942594810265567795884c83b5a37a8cbc98d3e Author: TB Schardl Date: Wed May 10 03:08:49 2017 +0000 [CompilerInvocation] Modified compiler flags such that the -ftapir flag uses -O2 as the default optimization level. commit 05eaad0686e47e2493faa209cf2068cd3cad79b6 Author: TB Schardl Date: Wed May 10 02:48:34 2017 +0000 [CilkSpawn] Add better support for spawning function calls to evaluate all function-call arguments before the spawn and to support spawning functions that can throw. commit 3da2e2d33a79ef5bc1dfebb1d281bb8416f8cbde Author: TB Schardl Date: Wed May 10 02:46:41 2017 +0000 [CilkFor] Add better handling of cilk_for statements to ensure that the induction variable is passed by value to the loop body, even at -O0 compilation. commit 9331565f7fb12391cff6c290fadf0262d41b9949 Author: TB Schardl Date: Fri Mar 10 20:18:01 2017 +0000 [CGStmt] Make sure that, when emitting a parallel loop, the detached loop body has a single entry block. commit 9f1d91067d09c78435aa371397b109a24b05e892 Merge: cc78c4b608 b3210f2343 Author: TB Schardl Date: Fri Mar 3 13:18:37 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit cc78c4b6082bb80687e64c8104bf9744e6fa8fdc Author: TB Schardl Date: Tue Feb 21 19:47:20 2017 +0000 [SemaStmt] Added note. commit 00b4d1385c455ce5d5bf23df420e912ae883babd Author: TB Schardl Date: Wed Jan 11 13:58:14 2017 +0000 [CGStmt] Code cleanup. commit b35bb933f616bf96c0170aabba0f377f3a440fa5 Merge: 67ac4fb23d 9b64b9c084 Author: TB Schardl Date: Tue Jan 3 19:25:13 2017 +0000 Merge branch 'new_lowering' commit 9b64b9c084d8efe985d3db49a4d51ef0a45d86f6 Author: TB Schardl Date: Sat Dec 31 17:24:18 2016 +0000 [ASTReaderStmt] Fixing last merge. commit c99168e0732c1a20f684078827ac08e84ca56eab Merge: e0b6da6da3 021fcf99fa Author: TB Schardl Date: Sat Dec 31 15:02:48 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang into new_lowering commit e0b6da6da3c2d91599171cd82c7cd6ed3abd4779 Merge: 0835245a64 49d944aa25 Author: TB Schardl Date: Tue Dec 20 14:25:22 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang into new_lowering commit 0835245a64189724166a95ab5fbbde090f88cf0c Author: TB Schardl Date: Tue Dec 20 14:12:33 2016 +0000 [Cilk] Removing deprecated COND_DECL field from _Cilk_for statements. Merging with upstream master. commit ea66b8494a03b0a9d790bf3263e853d31640c7a3 Merge: a624c934d7 5cc8e8e46c Author: TB Schardl Date: Mon Dec 19 21:46:42 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang into new_lowering commit a624c934d744f538bee0caa66254c80cb624e2e9 Author: TB Schardl Date: Sat Dec 10 20:43:30 2016 +0000 [SemaStmt] Bug fix to ensure that a _Cilk_for loop whose loop variable does not start at 0 is compiled correctly. commit 67ac4fb23d75463a426a8b3e27a17833b7557e62 Author: TB Schardl Date: Tue Nov 29 20:23:36 2016 +0000 [SemaStmt] Fixed handling of CilkFor statements to package declaration of loop end condition with the declaration of the loop variable. Separating these declarations seemed to cause a compiler crash on some C++ codes, such as breadthFirstSearch/deterministicBFS from the PBBS benchmark suite. commit fe2e37f1294283256f1217a63223f6a05e3c63d7 Merge: 8e5d660a00 dc3ec65843 Author: TB Schardl Date: Mon Nov 28 21:21:33 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit 8e5d660a00b6bb94f8160f21e871e8e0f1ac4db7 Author: TB Schardl Date: Mon Nov 28 16:01:55 2016 +0000 [SemaStmt] Fixed handling of CilkFor statements to package declaration of loop end condition with the declaration of the loop variable. Separating these declarations seemed to cause a compiler crash on some C++ codes, such as breadthFirstSearch/deterministicBFS from the PBBS benchmark suite. commit 2cbd8ba396b4ee96d6e43295f74291dcf1be4d71 Author: TB Schardl Date: Mon Nov 28 15:59:28 2016 +0000 [Tools] Cleaning up code formatting. commit a406dcd2ac447e9808a3f71653ac880b76678a33 Merge: bb18de3a95 e2a2ad2607 Author: TB Schardl Date: Sat Nov 26 17:25:30 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit 5eb47402427f12dbdba8ddf46a021598ede49286 Merge: b33752d57d b1f9c060a1 Author: Ubuntu Date: Thu Nov 24 17:08:17 2016 +0000 Bring up to date with most recent clang commit bb18de3a956146a6e5af364d271bd20676036122 Merge: b33752d57d 017ae22a5c Author: TB Schardl Date: Wed Nov 23 18:32:52 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/clang commit b33752d57dfb9873c6e8e7734631cd8ee96b80ea Author: TB Schardl Date: Fri Nov 11 21:37:31 2016 +0000 [CSI] Modified CSI to run just before Tapir instructions are lowered to calls into a parallel runtime system. commit 52c7f3c932aa8ef68c27b4cf9b3457349732d468 Author: TB Schardl Date: Fri Nov 11 21:21:34 2016 +0000 [SemaStmt] Add processing to treat a simple strided _Cilk_for loops as syntactic sugar for an 'equivalent' loop with a well-defined iteration count. commit b2cc806a36059fd3b56df002581e989a7f4c3901 Author: TB Schardl Date: Sun Oct 30 18:45:47 2016 +0000 Modify Clang to add metadata marking _Cilk_for loops as targets for the LoopSpawning Tapir optimization. commit 73c8a57626e9aa7ad73d8969c11be225fa60d617 Author: TB Schardl Date: Sat Oct 22 14:23:54 2016 +0000 Adding front-end control for CSI instrumentation pass, copied from https://github.com/CSI-LLVM/. commit cded8ec0822e0a0ee46ecdd00fa26054723f4196 Author: TB Schardl Date: Wed Oct 19 15:49:53 2016 +0000 Minor code-formatting fixes. commit a684bcd919fc0bfce89fe0b9f89545c355f888b4 Author: TB Schardl Date: Wed Oct 19 15:43:11 2016 +0000 [_Cilk_for] Modified _Cilk_for loops such that, if the loop condition is simple, then the loop limit is evaluated before the loop. For example, consider the following _Cilk_for loop: _Cilk_for (int i = X; i < Y; ++i) This loop is transformed such that Y is evaluated once before the loop executes. In effect, this loop is treated as syntactic sugar for the following loop: _Cilk_for (int i = X, __end = Y; i < __end; ++i) More generally, Y can be any expression that does not use a variable declared in the initialization. commit bf8757c3f23156b989d16bb0184df33ef75d8f76 Author: William S. Moses Date: Thu Oct 6 12:05:44 2016 -0400 Allow non-broken continue statements commit 1c283243292d1417e1eecf5933c24e30b77c7609 Author: William S. Moses Date: Thu Sep 22 09:41:37 2016 -0400 Fix cilk for parsing commit 51abaa104d316d00cd51d9f23faed68b93a8650f Author: William S. Moses Date: Thu Sep 15 15:59:42 2016 -0400 fix sync commit 83682325acfd9837a8ee881449910d21fbef4481 Merge: c79fdb5b38 1f8593ff43 Author: William S. Moses Date: Wed Aug 17 18:10:56 2016 -0400 Merge branch 'master' of github.com:llvm-mirror/clang commit c79fdb5b38c1a411d984e9e3a333adebe353d228 Author: William S. Moses Date: Wed Aug 17 18:10:54 2016 -0400 fix races commit 08d3f57e81bba6fd80db7c4dc69d4ea75512fc9d Merge: d2d8a85e04 0fc13f413b Author: William S. Moses Date: Wed Aug 17 17:31:26 2016 -0400 cleanup commit d2d8a85e04dcf81837807dfab89f6ba9bec43b09 Merge: 3e3f4a559c be40902a7d Author: William S. Moses Date: Wed Aug 17 03:10:53 2016 -0400 merge with master commit 3e3f4a559c6e85e710d017b35328b5776a323115 Author: William S. Moses Date: Mon Aug 15 01:44:09 2016 -0400 add race detector commit 7dafdeb283ac6223dd5b33573e3e92fcc3579ca3 Author: William S. Moses Date: Wed Aug 10 00:01:37 2016 -0400 add cilktools commit 8a289673e0017f9f4c7be5a6ae491589ce19db8e Author: William S. Moses Date: Wed Aug 10 00:01:25 2016 -0400 add headers to copy list commit e09312ffd203a3b15729c798daa20784b6dbd40d Author: William S. Moses Date: Mon Aug 8 15:11:36 2016 -0400 add internal headers commit 1a83cefdc8199ffb9f0b95ce69c8cdb210b23178 Author: William S. Moses Date: Thu Jun 23 13:48:08 2016 -0400 correct cleanup generation for cilk for commit 685e973d62559b7180b4c02bf27e0db2550a9c10 Author: William S. Moses Date: Fri Jun 17 14:38:12 2016 -0400 remove debug commit 909c2827d7e72dd270dd6e03ec9176dab268bb33 Author: Ubuntu Date: Wed Jun 15 20:59:07 2016 +0000 nomem commit 0f25458f67389327cc0ed4329c58a3d6b716e79e Author: William S. Moses Date: Tue Jun 14 23:20:02 2016 -0400 fix inline bug commit a618215bfea4d6678d0a954518cd40099c19c3f5 Author: William S. Moses Date: Thu Jun 9 10:44:27 2016 -0400 fix lcilkrts commit a94cc48d6b6fbaa6978084b9d952a2736d18d3fb Author: William S. Moses Date: Wed Jun 8 01:06:06 2016 -0400 clang support for new opt passes commit 7fdfe18842e4992ec05c8614742d884f94933b09 Author: William S. Moses Date: Mon Jun 6 16:00:57 2016 -0400 resolve diff issues with release_38 commit 556beaee73811d7c273c24b3cd2bf79cfd02c158 Author: William S. Moses Date: Mon Jun 6 11:32:23 2016 -0400 implicit sync at end of function commit 3f43bf9ef20015d940dfa29b225c37e2bf92a1e1 Author: William S. Moses Date: Thu Jun 2 16:46:01 2016 +0000 add cilk headers commit 818ee0b282c82aacb7512c9564123ce4e3ced6ac Author: William S. Moses Date: Tue May 24 23:03:34 2016 -0400 fix allocation issue commit 9d9712fa9cdaf0d0107e3a9726e13d6d29dd8aea Author: TB Schardl Date: Fri Mar 25 19:20:09 2016 +0000 Adjusting DiagnosticIDs to account for Cilk-related diagnostics. commit c30adfe8a15411f1cc4181b5f682f84f02468ede Author: TB Schardl Date: Fri Mar 25 19:19:08 2016 +0000 Cleaning up some spacing. commit e1ad85807cfb448a0cca2d121d007d4251d2b849 Merge: 2316a38783 9fd77bd681 Author: TB Schardl Date: Fri Mar 25 12:32:45 2016 +0000 Merge branch 'release_38' of http://llvm.org/git/clang commit 9fd77bd68130d9b2fbc56a3138b6f981d560480a Author: Alexander Kornienko Date: Thu Mar 17 14:58:32 2016 +0000 Add clang-tidy release notes. git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@263715 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2316a38783532c629f6befe90f5fbcc47b2f9667 Merge: 549aad9fb0 ad2c56e8cd Author: TB Schardl Date: Thu Mar 17 08:11:30 2016 +0000 Merge branch 'release_38' of http://llvm.org/git/clang commit ad2c56e8cd2c0e39ee97bd6bde087c37118737e7 Author: Hans Wennborg Date: Mon Mar 7 17:17:19 2016 +0000 ReleaseNotes: Alignment; by John McCall git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@262836 91177308-0d34-0410-b5e6-96231b3b80d8 commit 47ba141ac03f73cbf4b5f9953954044b127070d9 Author: Hans Wennborg Date: Wed Mar 2 23:49:46 2016 +0000 ReleaseNotes: tidy up git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@262544 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6d174cafcd7478b48af2b2a147962eaae0c24086 Author: Hans Wennborg Date: Wed Mar 2 21:46:21 2016 +0000 ReleaseNotes: -fstrict-vtable-pointers, by Piotr Padlewski git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@262523 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4ece949252b676db6b6fb24ab3fe80da388b5d02 Author: Hans Wennborg Date: Tue Mar 1 19:10:09 2016 +0000 ReleaseNotes: fix build failure git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@262365 91177308-0d34-0410-b5e6-96231b3b80d8 commit b98cd9d6a300bcd4863a26786bb1b5d782dcecc0 Author: Hans Wennborg Date: Tue Feb 23 21:20:39 2016 +0000 Merging r261669: ------------------------------------------------------------------------ r261669 | aaronballman | 2016-02-23 10:55:15 -0800 (Tue, 23 Feb 2016) | 1 line Amends r252104 to evaluate the controlling expression in an unevaluated context. This eliminates false-positive diagnostics about null pointer dereferences (etc) in the controlling expression. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261684 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7530cdd7b3ca33eddde0cf2ca1cf1bdd0d23088b Author: Hans Wennborg Date: Mon Feb 22 18:48:10 2016 +0000 Merging r261422: ------------------------------------------------------------------------ r261422 | rdivacky | 2016-02-20 00:31:24 -0800 (Sat, 20 Feb 2016) | 10 lines Fix handling of vaargs on PPC32 when going from regsave to overflow. It can happen that when we only have 1 more register left in the regsave area we need to store a value bigger than 1 register and therefore we go to the overflow area. In this case we have to leave the last slot in the regsave area unused and keep using overflow area. Do this by storing a limit value to the used register counter in the overflow block. Issue diagnosed by and solution tested by Mark Millard! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261553 91177308-0d34-0410-b5e6-96231b3b80d8 commit fe5341475003ecbef1f3d7097a19a5b578acc494 Author: Michael Wong Date: Mon Feb 22 14:58:44 2016 +0000 [OpenMP] Update 3.8 release notes support for OpenMP http://reviews.llvm.org/D17323 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261526 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9414bac729e6a137209d21f89222d7e6fc49f386 Author: Renato Golin Date: Fri Feb 19 19:36:35 2016 +0000 Merge r261310: Add test for ARM: fix VFP asm constraints git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261357 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9ced04abfd5784671b39f5eaa0c5e28cffbf6d33 Author: Renato Golin Date: Fri Feb 19 17:40:14 2016 +0000 Merge r261309: ARM: fix VFP asm constraints git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261343 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9398ce076a4592cdfe2428caf7740d5e619374c6 Author: Hans Wennborg Date: Thu Feb 18 20:51:23 2016 +0000 Merging r261209: ------------------------------------------------------------------------ r261209 | abataev | 2016-02-18 05:48:15 -0800 (Thu, 18 Feb 2016) | 4 lines [OPENMP] Fix codegen for lastprivate loop counters. Patch fixes bug with codegen for lastprivate loop counters. Also it may improve performance for lastprivates calculations in some cases. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261257 91177308-0d34-0410-b5e6-96231b3b80d8 commit fd4cbd0143e3adb2f0042dd1678d355c152c3625 Author: Hans Wennborg Date: Thu Feb 18 20:49:41 2016 +0000 Merging r261080: ------------------------------------------------------------------------ r261080 | abataev | 2016-02-17 02:29:05 -0800 (Wed, 17 Feb 2016) | 3 lines [OPENMP] Fix handling loop-based directives with arrays. Patch fixes possible problems with correct handling arrays as expressions in initialization, conditions etc in loop-based constructs. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261256 91177308-0d34-0410-b5e6-96231b3b80d8 commit f3d159cb9320b6dfce5bb4d72083bf54b4a27f10 Author: Hans Wennborg Date: Thu Feb 18 16:49:14 2016 +0000 Merging r257763: ------------------------------------------------------------------------ r257763 | djasper | 2016-01-14 05:36:46 -0800 (Thu, 14 Jan 2016) | 8 lines clang-format: Fix incorrectly enforced linebreak with ColumnLimit 0. Before: aaaa[bbbb] .cccc(); After: aaaa[bbbb].cccc(); ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261225 91177308-0d34-0410-b5e6-96231b3b80d8 commit 961db8de3198ee3aae87194fc02400531ce81a61 Author: Hans Wennborg Date: Wed Feb 17 21:28:41 2016 +0000 Merging r259874: ------------------------------------------------------------------------ r259874 | compnerd | 2016-02-04 20:12:40 -0800 (Thu, 04 Feb 2016) | 8 lines CodeGen: correct Windows ARM C++ assertion Because the Decl is explicitly passed as nullptr further up the call chain, it is possible to invoke isa on a nullptr, which will assert. Guard against the nullptr. Take the opportunity to reuse the helper method rather than re-implementing this logic. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261158 91177308-0d34-0410-b5e6-96231b3b80d8 commit 36ac462edb307d0c855ec35994b244f47f0216e4 Author: Dimitry Andric Date: Tue Feb 16 19:56:48 2016 +0000 Merging r260851: ------------------------------------------------------------------------ r260851 | dim | 2016-02-14 17:08:20 +0100 (Sun, 14 Feb 2016) | 13 lines As reported in https://llvm.org/bugs/show_bug.cgi?id=25496, on FreeBSD, C++ programs compiled for profiling (using `-pg`) should be linked with `-lc++_p` (or `-lstdc++_p`, depending on the `-stdlib=` setting), not with the regular C++ libraries. Add a `FreeBSD::AddCXXStdlibLibArgs()` override to handle this, and add a test case for it. While here, extend the test case for the proper passing of -lm and -lm_p. Reviewers: compnerd, davide, dws, emaste Reviewed By: compnerd Differential Revision: http://reviews.llvm.org/D16264 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261003 91177308-0d34-0410-b5e6-96231b3b80d8 commit 63c572b11d39c76ecf9e0533d0b9263c86b6e190 Author: Hans Wennborg Date: Tue Feb 16 19:47:16 2016 +0000 Merging r260370: ------------------------------------------------------------------------ r260370 | abataev | 2016-02-10 02:50:12 -0800 (Wed, 10 Feb 2016) | 1 line Fix PR26543: add a check for definition in CXXRecordDecl. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@261002 91177308-0d34-0410-b5e6-96231b3b80d8 commit 644c83762a56c982a2e452c426ee7043a0ae684f Author: Hans Wennborg Date: Tue Feb 16 19:25:09 2016 +0000 ReleaseNotes: CUDA support; by Artem Belevich git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260997 91177308-0d34-0410-b5e6-96231b3b80d8 commit b99a812c69df83afa6566e98af3cd8a9e4ef261c Author: Hans Wennborg Date: Tue Feb 16 19:12:16 2016 +0000 ReleaseNotes: -Wmicrosoft was split up; by Nico Weber git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260992 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2da05f45ceb97085180bc9429a3c7e9c8c18895c Author: Hans Wennborg Date: Tue Feb 16 18:43:16 2016 +0000 ReleaseNotes: OpenCL section By Anastasia Stulova! git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260988 91177308-0d34-0410-b5e6-96231b3b80d8 commit 66c687bace28eb6d694e01a440d098c0b836ae3a Author: Hans Wennborg Date: Tue Feb 16 17:37:41 2016 +0000 ReleaseNotes: fix typo reported by Eugene git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260984 91177308-0d34-0410-b5e6-96231b3b80d8 commit 67c4c997bc5147b592ecf64cff1d7a6268810fa9 Author: Hans Wennborg Date: Fri Feb 12 22:51:41 2016 +0000 Merging r260616: ------------------------------------------------------------------------ r260616 | joerg | 2016-02-11 15:18:36 -0800 (Thu, 11 Feb 2016) | 4 lines Now that Sparc/Sparc64 backend is mostly usable, provide the same linking defaults as other NetBSD targets, i.e. compiler_rt-in-libc and libc++ as STL. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260756 91177308-0d34-0410-b5e6-96231b3b80d8 commit 46d2feaf7d76ca3a92f3b7a222994a0d052c1e3c Author: Hans Wennborg Date: Fri Feb 12 20:21:39 2016 +0000 ReleaseNotes: autoconf deprecation, by Chris Bieneman git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260718 91177308-0d34-0410-b5e6-96231b3b80d8 commit de7a8d34feb344cf24846f2eeb1cb78238d4d4e6 Author: Hans Wennborg Date: Fri Feb 12 20:20:26 2016 +0000 ReleaseNotes: static analyzer, by Anna Zaks git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260716 91177308-0d34-0410-b5e6-96231b3b80d8 commit c5034dec1811f82237f417e1ab4443e21fd5d600 Author: Hans Wennborg Date: Fri Feb 12 01:54:57 2016 +0000 ReleaseNotes: __builtin_object_size and overloadable; by George Burgess IV git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260642 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78ee98fedcebaafd227616a1ad008a608205f79a Author: Hans Wennborg Date: Fri Feb 12 01:04:08 2016 +0000 Merging r260637: ------------------------------------------------------------------------ r260637 | hans | 2016-02-11 17:01:37 -0800 (Thu, 11 Feb 2016) | 1 line UsersManual: update clang-cl commands ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260638 91177308-0d34-0410-b5e6-96231b3b80d8 commit 80adc9047d8016ffdea1659ac135b94fd0fb22de Author: Hans Wennborg Date: Fri Feb 12 00:50:11 2016 +0000 Generate docs/AttributeReference.rst I ran: $ bin/clang-tblgen -gen-attr-docs -I../cfe.src/include ../cfe.src/include/clang/Basic/Attr.td -o ../cfe.src/docs/AttributeReference.rst git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260636 91177308-0d34-0410-b5e6-96231b3b80d8 commit e3c9e22572e62142c234bb6859d27e864d92aa35 Author: Hans Wennborg Date: Fri Feb 12 00:48:28 2016 +0000 ReleaseNotes: drop in-progress warning and svn checkout note git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@260635 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2d49f0a0ae8366964a93e3b7b26e29679bee7160 Author: Hans Wennborg Date: Fri Feb 5 22:15:03 2016 +0000 Merging r259931: ------------------------------------------------------------------------ r259931 | uweigand | 2016-02-05 13:34:28 -0800 (Fri, 05 Feb 2016) | 33 lines [SystemZ] Define __GCC_HAVE_SYNC_COMPARE_AND_SWAP macros Define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_[1248] macros on SystemZ. This fixes a miscompile of GCC C++11 standard library headers due to use of those macros in an ABI-changing manner. See e.g. /usr/include/c++/4.8.5/ext/concurrence.h: // Compile time constant that indicates prefered locking policy in // the current configuration. static const _Lock_policy __default_lock_policy = #ifdef __GTHREADS #if (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2) \ && defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4)) _S_atomic; #else _S_mutex; #endif #else _S_single; #endif A different choice of __default_lock_policy causes different sizes of several of the C++11 data structures, which are then incompatible when inlined in clang-compiled code with what the (GCC-compiled) external library expects. This in turn leads to various crashes when using std::thread in code compiled with clang, as see e.g. via the ThreadPool unit tests. See PR 26473 for an example. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259939 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9c9d4e2a396b6f024fae416c078d59c9c19fa34d Author: Hans Wennborg Date: Thu Feb 4 18:20:19 2016 +0000 Merging r259183: ------------------------------------------------------------------------ r259183 | uweigand | 2016-01-29 02:45:23 -0800 (Fri, 29 Jan 2016) | 8 lines Add target triple to CodeGenOpenCL/pipe_types.cl test case The test is failing on SystemZ since different IR is being generated due to platform ABI differences. Add a target triple. Fix suggested by Anastasia Stulova. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259808 91177308-0d34-0410-b5e6-96231b3b80d8 commit fb740de72cc6c134055128bcef92bf1a612657d7 Author: Hans Wennborg Date: Tue Feb 2 23:26:23 2016 +0000 Merging r259598: ------------------------------------------------------------------------ r259598 | rsmith | 2016-02-02 15:11:49 -0800 (Tue, 02 Feb 2016) | 7 lines Work around build failure due to GCC 4.8.1 bug. We don't completely understand the details of the bug, but avoiding overloading llvm::cast with another function template sidesteps it. See gcc.gnu.org/PR58022 for details of the bug, and llvm.org/PR26362 for more backgound on how it manifested in Clang. Patch by Igor Sugak! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259603 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4b4684fa765d7eb67d1c2c9b2d474fe94508b3b9 Author: Hans Wennborg Date: Mon Feb 1 22:13:54 2016 +0000 Merging r257831, r257838, r257853, r257861, r257869, r257870, r257871. ------------------------------------------------------------------------ r257831 | rtrieu | 2016-01-14 14:56:39 -0800 (Thu, 14 Jan 2016) | 13 lines Refactor template type diffing 1) Instead of using pairs of From/To* fields, combine fields into a struct TemplateArgInfo and have two in each DiffNode. 2) Use default initialization in DiffNode so that the constructor shows the only field that is initialized differently on construction. 3) Use Set and Get functions per each DiffKind to make sure all fields for the diff is set. In one case, the Expr fields were not set. 4) Don't print boolean literals for boolean template arguments. This prevents printing 'false aka 0' Only #3 has a functional change, which is reflected in the test change. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257838 | rtrieu | 2016-01-14 15:30:12 -0800 (Thu, 14 Jan 2016) | 6 lines Change the TSTiterator in Template Type Diffing. Modify the TSTiterator to have two internal iterators, which will walk the provided sugared type and the desugared type. This will provide better access to the template argument information. No functional changes. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257853 | rtrieu | 2016-01-14 17:08:56 -0800 (Thu, 14 Jan 2016) | 11 lines Make template type diffing use the new desguared iterator. If available, use the canonical template argument to fill in information for template type diffing instead of attempting to special case and evaluate Expr's for the value. Since those are the values used in template instantiation, we don't have to worry about difference between our evaluator and theirs. Also move the nullptr template arguments from DiffKind::Expression to DiffKind::Declaration and allow DiffKind::Declaration to set an Expr. The only effect that should result is that a named nullptr will show up as 'ptr aka nullptr' in diagnostics. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257861 | rtrieu | 2016-01-14 18:55:17 -0800 (Thu, 14 Jan 2016) | 7 lines Save the integer type for integral template arguments. Save the integer type when diffing integers in template type diffing. When integers are different sizes, print out the type along with the integer value. Also with the type information, print true and false instead of 1 and 0 for boolean values. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257869 | rtrieu | 2016-01-14 21:01:53 -0800 (Thu, 14 Jan 2016) | 6 lines Add new diff modes to template type diffing. Remove an old assertion that does not hold. It is possible for a template argument to be a declaration in one instantiation and an integer in another. Create two new diff kinds for these (decl vs int and int vs decl). ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257870 | rtrieu | 2016-01-14 21:48:38 -0800 (Thu, 14 Jan 2016) | 10 lines Fixing more issues with template type diffing 1) Print qualifiers for templates with zero arguments 2) Add a few more tests for the template type diffing refactoring. Specifically, PR24587 has been fixed and has a test case from http://reviews.llvm.org/D15384 3) Adds asserts to check the DiffTree is in correct state when moving nodes 4) Rename the field FromType and ToType since it is heavily used within member functions. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257871 | jyknight | 2016-01-14 21:57:41 -0800 (Thu, 14 Jan 2016) | 1 line Fix a -Wparentheses warning in ASTDiagnostic.cpp. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259422 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6ea7e4b6a1d902d02742f54f0e4a9dade810db83 Author: Hans Wennborg Date: Mon Feb 1 21:43:22 2016 +0000 Merging r258396: ------------------------------------------------------------------------ r258396 | abataev | 2016-01-21 04:54:48 -0800 (Thu, 21 Jan 2016) | 2 lines Fix crash for typedefs for arrays of runtime bounds in Lambdas/Captured Statements, used in sizeof() expression only. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259414 91177308-0d34-0410-b5e6-96231b3b80d8 commit 265154b171f5257d82d6aa22f52a39b5d5ff3209 Author: Hans Wennborg Date: Mon Feb 1 21:40:38 2016 +0000 Merging r257710: ------------------------------------------------------------------------ r257710 | gbiv | 2016-01-13 15:36:34 -0800 (Wed, 13 Jan 2016) | 8 lines [Sema] Suppress diags in overload resolution. We were emitting diagnostics from our shiny new C-only overload resolution mode. This patch attempts to silence all such diagnostics. This fixes PR26085. Differential Revision: http://reviews.llvm.org/D16159 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259412 91177308-0d34-0410-b5e6-96231b3b80d8 commit ede31aea4a62a166c23babb7657c4847fbd304ad Author: Hans Wennborg Date: Mon Feb 1 17:10:12 2016 +0000 Merging r259260: ------------------------------------------------------------------------ r259260 | samsonov | 2016-01-29 15:07:14 -0800 (Fri, 29 Jan 2016) | 1 line [UBSan] Add documentation for runtime issue suppression. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259371 91177308-0d34-0410-b5e6-96231b3b80d8 commit 42788d24ed06f441962cc858853a9debf49ac0e2 Author: Alexey Bataev Date: Fri Jan 29 05:14:10 2016 +0000 Merging r258307 and r258495: ------------------------------------------------------------------------ r258307 | abataev | 2016-01-20 15:29:47 +0300 (Wed, 20 Jan 2016) | 3 lines [OPENMP 4.0] Fix for codegen of 'cancel' directive within 'sections' directive. Allow to emit code for 'cancel' directive within 'sections' directive with single sub-section. ------------------------------------------------------------------------ r258495 | abataev | 2016-01-22 11:56:50 +0300 (Fri, 22 Jan 2016) | 3 lines [OPENMP] Generalize codegen for 'sections'-based directive. If 'sections' directive has only one sub-section, the code for 'single'-based directive was emitted. Removed this codegen, because it causes crashes in different cases. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@259160 91177308-0d34-0410-b5e6-96231b3b80d8 commit 11f818d8c60f5dc1bef681640b71c756bc72b168 Author: Hans Wennborg Date: Mon Jan 25 20:35:13 2016 +0000 Merging r257947: ------------------------------------------------------------------------ r257947 | joerg | 2016-01-15 14:29:34 -0800 (Fri, 15 Jan 2016) | 2 lines Avoid self-assignment of SmallString, trigger UB behavior down the road. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@258715 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1dc0d8b09cefcb0b354f0fbde009c722cf44207c Author: Dimitry Andric Date: Fri Jan 22 20:43:39 2016 +0000 Merging r258110: ------------------------------------------------------------------------ r258110 | faisalv | 2016-01-19 04:58:55 +0100 (Tue, 19 Jan 2016) | 15 lines Fix PR26134: When substituting into default template arguments, keep CurContext unchanged. Or, do not set Sema's CurContext to the template declaration's when substituting into default template arguments of said template declaration. If we do push the template declaration context on to Sema, and the template declaration is at namespace scope, Sema can get confused and try and do odr analysis when substituting into default template arguments, even though the substitution could be occurring within a dependent context. I'm not sure why this was being done, perhaps there was concern that if a default template argument referred to a previous template parameter, it might not be found during substitution - but all regression tests pass, and I can't craft a test that would cause it to fails (if some one does, please inform me, and i'll craft a different fix for the PR). This patch removes a single line of code, but unfortunately adds more than it removes, because of the tests. Some day I still hope to commit a patch that removes far more lines than it adds, while leaving clang better for it ;) Sorry that r253590 ("Change the expression evaluation context from Unevaluated to ConstantEvaluated while substituting into non-type template argument defaults") caused the PR! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@258549 91177308-0d34-0410-b5e6-96231b3b80d8 commit b6c67ba4d0eae5b63cbbd8817892e38eeab649e2 Author: Alexey Bataev Date: Fri Jan 22 04:07:48 2016 +0000 Merging r258394: ------------------------------------------------------------------------ r258394 | abataev | 2016-01-21 15:35:58 +0300 (Thu, 21 Jan 2016) | 3 lines [OPENMP] Fix crash on reduction for complex variables. reworked codegen for reduction operation for complex types to avoid crash ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@258483 91177308-0d34-0410-b5e6-96231b3b80d8 commit 87be85827c3675b6f86faf95a4f868889046812c Author: Hans Wennborg Date: Wed Jan 13 23:48:11 2016 +0000 Merging r257652 and r257695: ------------------------------------------------------------------------ r257652 | hans | 2016-01-13 11:14:03 -0800 (Wed, 13 Jan 2016) | 1 line Update cxx_dr_status.html after the 3.8 branch ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257695 | rsmith | 2016-01-13 14:51:59 -0800 (Wed, 13 Jan 2016) | 2 lines Update make_cxx_dr_status after the 3.8 branch. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@257714 91177308-0d34-0410-b5e6-96231b3b80d8 commit 289912a5def6c6fe40ceaa86bd86a94bce267eea Author: Paul Robinson Date: Wed Jan 13 19:24:51 2016 +0000 Release note for debugger tuning git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@257654 91177308-0d34-0410-b5e6-96231b3b80d8 commit c0b78cf1413392bc5245924920e1f42479f67138 Author: Hans Wennborg Date: Wed Jan 13 17:34:59 2016 +0000 Creating release_38 branch off revision 257626 git-svn-id: https://llvm.org/svn/llvm-project/cfe/branches/release_38@257631 91177308-0d34-0410-b5e6-96231b3b80d8 commit 549aad9fb0b88300f815f7222cda572d77f8b588 Author: William S. Moses Date: Wed Jan 6 18:25:36 2016 -0500 Revert "begin basic spawn expr" This reverts commit 8d0620168ba215af69043b5fb9155faad10001ae. commit 8d0620168ba215af69043b5fb9155faad10001ae Author: William S. Moses Date: Wed Jan 6 17:48:12 2016 -0500 begin basic spawn expr commit eb614e27dcf5b59882f9c3f80ed4345717bd3f97 Merge: ef0c337d4c ae24817fc4 Author: TB Schardl Date: Wed Dec 30 01:46:46 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit ef0c337d4c84e8f2e46b3ecf14773d9f0ade0b77 Merge: 9dc030caa8 2defbb3179 Author: TB Schardl Date: Fri Nov 6 11:51:17 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 9dc030caa868a05da42159cf4f1a68f8fd068bba Merge: 75fb4bbdf2 0c29653a59 Author: TB Schardl Date: Wed Nov 4 01:29:31 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 75fb4bbdf2f7215b0c3889eef3b85a9abac8eb12 Merge: 9a7026d061 a1299df82f Author: TB Schardl Date: Fri Oct 16 14:26:19 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 9a7026d061355e90e580bad10cb6c4f7db9a13a7 Merge: 0cfc5d34e7 a0bd79c879 Author: TB Schardl Date: Fri Oct 16 00:37:02 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 0cfc5d34e72655e165aa465acdb3b367fa365404 Merge: f33de512f7 9ff9a42902 Author: TB Schardl Date: Thu Oct 15 19:19:27 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit f33de512f7c0fd2a7bfe591f6be1602f3b674993 Merge: b75023b850 96497ca976 Author: TB Schardl Date: Tue Oct 13 16:58:48 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit b75023b850a79cc1e9374dbc0feafe9708ab855a Merge: 64059cfd77 d981a124bf Author: TB Schardl Date: Tue Oct 13 12:53:05 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 64059cfd773559df1cd2abc9368db98a17f5c48f Merge: 5934487932 c0ea625d92 Author: TB Schardl Date: Fri Oct 2 20:24:33 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 5934487932aac8f1a70b70bf531be44d5db4a94a Merge: 1cc2b4d793 675c6b4346 Author: TB Schardl Date: Tue Sep 22 17:53:25 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 1cc2b4d793131aa97cf745c51168abb5d85f65e1 Merge: 5342aa7a03 a83de9f778 Author: TB Schardl Date: Sun Sep 20 19:07:04 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 5342aa7a03d7275970c8b9f85bfffc72e4934fe7 Merge: 77ede05cb8 60669b0ea1 Author: TB Schardl Date: Sun Sep 13 12:11:25 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 77ede05cb88f47943071a24fb3d3b920a3585c9e Merge: 5c4f54b8c9 b6defdfa47 Author: TB Schardl Date: Tue Sep 8 21:48:10 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit 5c4f54b8c946e9aedbeb84c4a33c883f5ae5a741 Merge: c27303046e 90ea10e5ea Author: TB Schardl Date: Tue Sep 8 15:50:40 2015 +0000 Merge branch 'master' of http://llvm.org/git/clang commit c27303046e97baa5184ca71d8cc574f9765b7af9 Merge: 387c4b0f64 f08b591b3f Author: TB Schardl Date: Fri Sep 4 11:31:52 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 387c4b0f64dd71fa7f1ce8304d81ba89b7020cb8 Merge: f501fbb783 ad8f870018 Author: TB Schardl Date: Tue Sep 1 16:59:17 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit f501fbb7836df11460503fa16eb893628c8df381 Merge: 77003a470c a9262e954a Author: TB Schardl Date: Sun Aug 30 08:51:43 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 77003a470cd3c616abbfeba379f209daee99ff2a Merge: 2740b0202e 6b6df26f3b Author: TB Schardl Date: Sat Aug 29 12:04:05 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 2740b0202e6f416acac7783454da91bc4b8beebe Merge: f8860a94a2 2c55320819 Author: TB Schardl Date: Fri Aug 28 18:13:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit f8860a94a2531afdd93be84d5b92c237b6e918a0 Merge: fbc72c7454 f4d02c776c Author: TB Schardl Date: Tue Aug 25 14:00:48 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit fbc72c7454b1c8813f798e7b07294144fa5b1a22 Merge: 6dd4764afd e0848b6353 Author: TB Schardl Date: Sat Aug 22 09:51:09 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 6dd4764afdbd8df45ca582d7df415547e55b121a Merge: 9c2124d18f f27472a5fa Author: TB Schardl Date: Tue Aug 18 11:25:18 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 9c2124d18f0c85933e1b41d3577864e04bda377f Author: TB Schardl Date: Tue Aug 18 11:13:10 2015 -0400 Generate shorter names for successors of detach statement added for _Cilk_spawn commit 63365e0e7a4e06b3cf4a2d6f65e1d2d01ba88d62 Merge: 623dd9aed9 19d5024c80 Author: TB Schardl Date: Mon Aug 17 08:53:06 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 623dd9aed9f0d33eb1b5c3e48416f090056aadbb Merge: c68c021b52 2563c75502 Author: TB Schardl Date: Sat Aug 15 11:34:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit c68c021b52dac30aa5e2613cb98f991f5245cf8f Merge: b3d71b31fe d2bb239313 Author: TB Schardl Date: Fri Aug 14 09:26:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit b3d71b31fe475542c0c441cb46a07bb07b65015f Merge: 8f069abfa4 013510776b Author: TB Schardl Date: Thu Aug 13 12:36:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 8f069abfa406a9b37835b31c5f6d025b5ea62c1b Merge: e62504c6d9 a92ab3bb56 Author: TB Schardl Date: Tue Aug 11 13:30:57 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit e62504c6d903aedd1f105bdda020ff465c282255 Author: TB Schardl Date: Tue Aug 11 13:30:46 2015 -0400 Fixed naming inconsistency in basic blocks for parallel loops. commit 5447e9dc150ac8625c78707b0bbf90b3d86869e1 Merge: 67eac8b63e 8aef53c9eb Author: TB Schardl Date: Mon Aug 10 12:53:50 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 67eac8b63e17b3785179734a71a888555393ab22 Merge: 32b7e71e41 df20ae37bc Author: TB Schardl Date: Sat Aug 8 09:12:33 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 32b7e71e41fe61b0eb74c7951ef975c99e809fed Merge: d08e96e4f3 56599d9f1d Author: TB Schardl Date: Fri Aug 7 09:05:01 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit d08e96e4f31b632c0a0114b2b92113385847a59b Merge: 4c42ef9fb0 af8d90c61e Author: TB Schardl Date: Thu Aug 6 08:08:28 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 4c42ef9fb095ad473ec3931206dc771e9a06712c Merge: f86a41c219 e770ba628e Author: TB Schardl Date: Tue Aug 4 14:30:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit f86a41c219a505dacbdd80e495f922e76f37ccf9 Author: TB Schardl Date: Tue Aug 4 14:06:16 2015 -0400 Simple support for _Cilk_for, compiling such loops into loops whose bodies are detached. commit fc3fb22f0b723cb4052a6509cb13046018b31f5d Author: TB Schardl Date: Tue Aug 4 13:43:27 2015 -0400 Removing seemingly unnecessary methods from CompoundScopeInfo commit 16a4379879ec334dbcbeb951a1534634fc2ad20f Merge: e6d44bf20e 0cd2b8ea3e Author: TB Schardl Date: Fri Jul 31 08:51:46 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit e6d44bf20edf4ba9c7af13e152a01f70140c9eec Merge: b633a50c28 e8675cbb96 Author: TB Schardl Date: Wed Jul 29 08:48:28 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit b633a50c284ef585e5bb2c655723f0ce4a3c0a63 Merge: 0bce4966be 90cbf30ef1 Author: TB Schardl Date: Mon Jul 27 08:22:40 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 0bce4966be74d281fa0b11287d2a4165e5924bb8 Merge: c7f49c9776 4b22d484c8 Author: TB Schardl Date: Thu Jul 23 08:58:05 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit c7f49c977615befb2110b5b8f85b9cdc421b46af Merge: 964cd234c5 eda30f12cd Author: TB Schardl Date: Wed Jul 22 08:30:51 2015 -0400 Merge branch 'master' of clang main commit 964cd234c51dea583bcea740a9778ea69a6b333f Merge: e057223460 e81928aec4 Author: TB Schardl Date: Tue Jul 21 08:42:58 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit e057223460d40a79297c6e0343d786a21c6494fe Merge: 009b6bf4cd ede86da43c Author: TB Schardl Date: Mon Jul 20 10:53:15 2015 -0400 Merge branch 'master' of clang main commit 009b6bf4cd9326ff9089c81f4fd67c2ec09e41e1 Merge: f845120461 f76b90d2c3 Author: TB Schardl Date: Fri Jul 17 09:14:25 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit f845120461e5bfc731bd0794ccd57cb838a83f89 Merge: 34005afba5 26f98e4220 Author: TB Schardl Date: Thu Jul 16 08:59:33 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 34005afba58995d38223f038a80298d062cfa2d7 Merge: 8794a46230 82a8792ad3 Author: TB Schardl Date: Wed Jul 15 08:17:22 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 8794a462306412175003a1c8d097a12c847b40e8 Merge: 3689e00a5c e214b24242 Author: TB Schardl Date: Tue Jul 14 09:19:38 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 3689e00a5c068740240c70ca35d8328e3fb1fa97 Merge: 4df53334ac 3e0ef38ccb Author: TB Schardl Date: Sat Jul 11 08:00:07 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 4df53334ac8de56b295d717b35ab7ba264dc412d Merge: e3913464d2 4c024e56f2 Author: TB Schardl Date: Fri Jul 10 08:48:32 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit e3913464d299423137f8f0076d0600f08a84d9c1 Merge: 74f4749731 78932e68d4 Author: TB Schardl Date: Thu Jul 9 09:04:10 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 74f4749731f3cf8019dda02e713716b306fa4cc9 Merge: 2420eb78c8 93ea4643bd Author: TB Schardl Date: Wed Jul 8 07:54:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 2420eb78c8192f21fc54ceb844d7c824fe161928 Author: TB Schardl Date: Tue Jul 7 23:59:57 2015 -0400 Using reworked reattach commit 37105d9e5fed5aadbcb2971551fffcd257ed73f0 Merge: 1d136b8847 4a4b27a329 Author: TB Schardl Date: Tue Jul 7 13:45:46 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 1d136b884787e9d32f2d31f497081652c593f0c4 Merge: d4263d1f76 37f3a94e95 Author: TB Schardl Date: Tue Jul 7 08:18:11 2015 -0400 Merge branch 'master' of clang main commit d4263d1f76d3ff9ffcef79546943d026ae37311d Merge: 31b0f54c0c 174220cf7c Author: TB Schardl Date: Mon Jul 6 08:23:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/clang commit 31b0f54c0cb9695386c7ec0eba0c558abe34b332 Author: TB Schardl Date: Thu Jul 2 15:06:23 2015 -0400 Adapted _Cilk_sync compilation to use temporary sync instruction in LLVM IR. commit 05b666b72ee1b528818345516d172c21911ec588 Merge: 75cbc61db3 a8245d8fe2 Author: TB Schardl Date: Thu Jul 2 10:55:15 2015 -0400 Merge branch 'master' of clang main commit 75cbc61db3f163217e0c8636ddb7731ee565033d Merge: 5b3d3a156f 6b4b8ef2b2 Author: TB Schardl Date: Thu Jul 2 08:53:38 2015 -0400 Merging with clang master commit 5b3d3a156fb1ff0d40f832721c9ce61f5a558260 Author: TB Schardl Date: Tue Jun 30 15:39:36 2015 -0400 README that documents current caveats with _Cilk_spawn and _Cilk_sync keyword support. Modification to Parser to simplify our lives slightly. commit 46ab4dcf51ad983e4f94f60d12408133f9049115 Merge: c4231f00bd a59db6f9fe Author: TB Schardl Date: Tue Jun 30 14:30:59 2015 -0400 Merge branch 'master' of github.com:taekwonbilly/Cilk-Clang commit c4231f00bd1cb14eb6d50f244075c5ca5ad88ae2 Author: TB Schardl Date: Tue Jun 30 14:29:11 2015 -0400 Initial commit to add something like Cilk support to Clang. commit a59db6f9feba1363c2fca49198efecaa6a8e9d26 Author: Billy Moses Date: Sat Jun 27 13:28:57 2015 -0700 first commit commit 68abe349f12b555d47c7eef36b81e8cfce590f15 Author: TB Schardl Date: Tue Dec 31 14:53:36 2019 +0000 [TapirTarget] [LoweringUtils] [LoopSpawningTI] Generalize Tapir-lowering infrastructure in a couple ways: -) Allow Tapir code to be outlined into a different Module from the spawning function. -) Make LoopSpawningTI run target-specific preprocessing and postprocessing of Functions containing Tapir loops. -) Support more general orderings of the loop-control arguments to an outlined helper for a Tapir loop. commit 547208491136cc70bebf494b535fc9bc54075144 Author: TB Schardl Date: Tue Dec 31 14:34:01 2019 +0000 [TapirTarget] Cleanup code with consistent function parameter names. commit 761c2bf6ce20a9be1e41c7eededf551bcb0a3293 Author: TB Schardl Date: Thu Dec 26 14:20:32 2019 +0000 [LoopSpawning] Remove deprecated LoopSpawning pass. commit e2ee5774eb695beea4777ec009076d6dda62c822 Author: TB Schardl Date: Thu Dec 26 14:14:14 2019 +0000 [TapirToTarget] Clean up debug output for LowerTapirToTarget pass. commit 95adf3f993e71c43f8c09c8902c28ffee78f9da3 Author: TB Schardl Date: Thu Dec 26 14:01:58 2019 +0000 [test/Tapir] Add test case to check attributes on helper functions generated during Tapir lowering when argument structures are used. commit 8acbbeae52a8d7f1eadd52e73ebebd869e2b5af8 Author: TB Schardl Date: Thu Dec 26 04:55:45 2019 +0000 [CilkABI] Bug fix to setting of helper-function attributes when synthesized to use argument structures. commit 714296d12b3a65a18215917633e25115851ba570 Author: TB Schardl Date: Tue Dec 24 17:02:29 2019 +0000 [test/TapirRaceDetect] Update regression test to match new output of TapirRaceDetect pass. commit 7a6a9d0e403367f1c5f294cd1adef143a4a254d0 Author: TB Schardl Date: Tue Dec 24 15:20:57 2019 +0000 [FunctionAttrs] Fix bug to avoid marking a function as argmemonly if the underlying object of a memory access is an arbitrary instruction, such as a load. commit e29e6e956582ab06092c95067bd522ac67169d91 Author: TB Schardl Date: Tue Dec 24 15:18:02 2019 +0000 [LoopStripMinePass] Allow Tapir-loop stripmining to operate on any loop with a Tapir-loop structure, not just loops marked for recursive divide-and-conquer loop spawning. commit ddf04b035b7b489ffca64f18ef6d8cf281986e70 Author: TB Schardl Date: Tue Dec 24 15:09:58 2019 +0000 [TapirToTarget] [LoopSpawningTI] Change Tapir lowering to support different approaches for lowering to the Cilk Plus runtime system. In particular: [TapirToTarget] Add support for using argument structures for outlined Tapir tasks and Tapir loops. [LoopSpawningTI] Add support for lowering Tapir loops to calls to __cilkrts_cilk_for runtime methods. Although these approaches for lowering to the Cilk Plus runtime system likely won't be used in practice, the functionality in the Tapir-lowering infrastructure to support these capabilities should be useful for other Tapir targets. commit 5b19a93498816c8246c96205a60114a797f09b33 Author: TB Schardl Date: Fri Dec 20 12:43:09 2019 +0000 [Analysis] [IR] [Passes] [Transforms] [Test] Minor fixes to code style and comments. commit d035d8964782a51bd41c584134004524a30fdc0e Author: TB Schardl Date: Mon Dec 2 21:56:25 2019 +0000 [CilkSanitizer] Cleanup code and improve debug output. commit 56fac7f9f017f910dcd7f53395b89d6b9e97182e Author: TB Schardl Date: Mon Dec 2 17:45:05 2019 +0000 [TapirRaceDetect] Add support for providing a list of functions and globals to ignore when performing Tapir-based static race detection. commit 2e28c0069a1ef5ba4efd1e392daffdd0c4cfc858 Author: TB Schardl Date: Mon Dec 2 17:43:08 2019 +0000 [TapirRaceDetect] Fix to check if an attribute on a function argument precludes a race with the caller. commit 3c6ebbdb1e46aa43d9c54d7ae5ddfeb8e4d03e5a Author: TB Schardl Date: Mon Dec 2 17:41:12 2019 +0000 [TapirRaceDetect] Keep track of which instruction potentially races with a given instruction for each potential race identified, when such a racer can be found. commit ab9a87743c20df4a6f307cf80beec5915444d28d Author: TB Schardl Date: Mon Dec 2 16:58:29 2019 +0000 [TapirRaceDetect] Cleanup code for printing analysis results. commit f918901fe0991920fc96cd8b065d35018b0f7d5f Author: TB Schardl Date: Mon Dec 2 16:50:22 2019 +0000 [TapirUtils] Fix typo in comment. commit 293f67a753fd4ad49a1d2e57bd05d6be15df4809 Author: TB Schardl Date: Mon Dec 2 16:49:14 2019 +0000 [DependenceAnalysis] Add simple methods for printing a GeneralAccess and checking equality between two GeneralAccess objects. commit cac080ab80c9fb29b350ddf5098d0ef03eebad47 Author: TB Schardl Date: Mon Dec 2 16:46:08 2019 +0000 [SROA] Remove dead Tapir-related code. commit c7e3ffa057a3937ee527ea3199d772ece1effc7c Author: Tao B. Schardl Date: Mon Nov 25 10:09:25 2019 -0500 Update README.md Fix link to GitHub CI status badge. commit b5f17f84b01e9d0a4919a166c8650a6cb89cfcce Author: TB Schardl Date: Mon Nov 25 15:01:48 2019 +0000 [CI] Update GitHub CI to properly build and test projects associated with Tapir-LLVM on multiple Ubuntu systems with multiple compilers and to incorporate a test to build the Cilk Plus runtime system using Tapir. commit 943bf73b713f9526ca41f972067d161e460f4928 Author: TB Schardl Date: Mon Nov 25 03:18:36 2019 +0000 [CI] Trying out GitHub CI. commit 6c1aa3721a83639da2cc2e9809ad47af647ab29f Author: TB Schardl Date: Tue Oct 15 12:51:58 2019 +0000 [CSI] Slightly better front-end data when debug symbols cannot be found for a particular instruction or function. commit c39950669f21b86fbd0d21371286d0c6d7c3e30e Author: TB Schardl Date: Sun Oct 13 12:09:00 2019 +0000 [CilkSanitizer] Fixed bug causing CilkSanitizer to fail to instrument function entry/exit and free calls in some cases. commit 5c56dbd5f1e1175e4f0191ed533121868f4aa2fa Author: TB Schardl Date: Sun Oct 13 12:06:41 2019 +0000 [TapirRaceDetect] Handle realloc as a special case, since it can free memory as well as allocate it. commit fcf9ecb89d3b533013020b40abdc6faf7b8ea0e4 Author: TB Schardl Date: Fri Oct 11 13:28:49 2019 +0000 [CilkSanitizer,CSI] Fixed bug with inserting loop instrumentation and splitting detach-continuation blocks. commit 2b2fedf308a0cc342eae426234a28a254df8dc90 Author: TB Schardl Date: Fri Oct 4 12:27:54 2019 +0000 [TaskSimplify] Avoid removing sync regions for syncs that's don't appear distinguishing themselves but are distinguished against by other syncs. commit fa078390265ae667f60401359b2bb80aab2081fc Author: TB Schardl Date: Fri Oct 4 03:20:06 2019 +0000 [CilkSanitizer] Pass sync-region information to Cilksan. commit 197af2fa80e65218926cb3253544fdc555c8d3a8 Author: TB Schardl Date: Thu Sep 26 11:55:29 2019 +0000 [CilkSanitizer] Fixed bug where Cilksan failed to instrument loads and stores whose underlying object was a noalias function argument. commit f649619d3ecc0b7f056214bc83919953a9990873 Author: TB Schardl Date: Sat Sep 7 18:04:51 2019 +0000 [CilkSanitizer] Avoiding unnecessary loads of suppression flags. commit 0a9664b7ccb75583eb7793af6b9c7b015e0b35f8 Author: TB Schardl Date: Sat Sep 7 18:03:23 2019 +0000 [CilkSanitizer] Adding Cilksan-specific attributes on CSI hooks that Ciksan uses. commit 074a84aef65c11c1a9ca92f1c4af8403f89cbe50 Author: TB Schardl Date: Sat Sep 7 17:58:13 2019 +0000 [TapirRaceDetect] Improved analysis of potential races between function arguments and instructions. commit 310d32b130ae4e0c97ec2f5c725eb101868572e4 Author: TB Schardl Date: Fri Sep 6 11:50:06 2019 +0000 [TapirRaceDetect] Bug fix to identify loops to check based on loops in common between the two accesses and their underlying objects. commit 32faa5b6c22a5baea06a1c8da369fd08f169625d Author: TB Schardl Date: Fri Sep 6 11:46:58 2019 +0000 [LoopStripMine] Fixed creation and updating of PHI nodes when nesting a stripmined parallel loop within a task, specifically, when spawning the epilog. commit 580565b41f396765b3f933e508d26ec9b9a188fa Author: TB Schardl Date: Wed Sep 4 22:28:10 2019 +0000 [CilkSanitizer] Add loop instrumentation. commit d41eaee2c761e78bfc609f6311169e122b68b04e Author: TB Schardl Date: Tue Sep 3 14:53:42 2019 -0400 [Cilksan] Bug fixes to support compilation using GCC 5. commit bebc1c561f99bed65a2ee234e7218986940440c2 Author: TB Schardl Date: Tue Sep 3 18:49:05 2019 +0000 [CSI] Add properties to task and task_exit hooks. commit f886dbcd82ba3458f2b3b314c9e1e54fa9c8d84e Author: TB Schardl Date: Tue Sep 3 17:57:56 2019 +0000 [CilkSanitizer] Removing code that is dead after moving static race-detection logic to a separate analysis pass. commit c77ce0174efaae4eb8cbb0cdc2ac70b7af4029b8 Author: TB Schardl Date: Tue Sep 3 17:50:58 2019 +0000 [CSI] Add support for instrumenting loops. commit f671cca224c9acda91c914e5f1ce68d126272087 Author: TB Schardl Date: Tue Sep 3 12:29:53 2019 +0000 [test/CilkSanitizer] Fixed fib test to reflect change to CilkSanitizer where enable/disable-instrumentation calls are not inserted around calls to const functions. commit 89d53a63f1aa091238c2bcb5bbdad221c5418881 Author: TB Schardl Date: Tue Sep 3 12:24:44 2019 +0000 [CSI] Make names of FED table variables in LLVM IR more meaningful, to simplify debugging. Add aligned_alloc to list of allocation functions. Code cleanup. commit f86d405f5494c4af5623c07b14abed7c57f3e937 Author: TB Schardl Date: Sun Sep 1 20:33:31 2019 +0000 [CilkSanitizer] Draft implementation of CilkSanitizer using TapirRaceDetect analysis to perform static race detection. This implementation includes code to suppress instrumentation based on dynamic propagation of TapirRaceDetect analysis between functions. commit 9c229b7f63f7d06accb0d59a78f1f13018762aa1 Author: TB Schardl Date: Sun Sep 1 20:30:47 2019 +0000 [CilkSanitizer] Various minor edits and bug fixes, including attributes for CilkSan hooks and changes to hooks to pass both the stack and frame pointers of a function or task. commit 821731233474c3a9ce6b6d35f2008a07dae1c702 Author: TB Schardl Date: Sun Sep 1 20:19:43 2019 +0000 [CilkSanitizer] Bug fix to pass the option of not assuming that unmarked calls might throw. commit 12084f1e662b9cf00628ee492d10c19f55e718cb Author: TB Schardl Date: Sun Sep 1 19:41:08 2019 +0000 [TapirRaceDetect] Prototype analysis pass to perform static determinacy-race detection based on Tapir and identify pairs accesses that might participate in a determinacy race. commit 8cd6fa9bbd839fbb13c52c66c44489cae23c0e53 Author: TB Schardl Date: Sun Sep 1 19:27:04 2019 +0000 [ValueTracking] Allow ValueTracking to ignore syncregion_start intrinsics similarly to other assume-like intrinsics. commit 79f169ab258e68f078b1c0c4215176e961692c68 Author: TB Schardl Date: Sun Sep 1 19:26:01 2019 +0000 [MemoryLocation] Allow masked_load and masked_store intrinsics to have an associated MemoryLocation. commit 0a2ff56595b2ae9d6e22ec7a039cebebbaeeea26 Author: TB Schardl Date: Sun Sep 1 19:19:09 2019 +0000 [Tapir] Add intrinsic to get the frame address of a Tapir task. commit cdda1d6b6f14d8d91c95467e0edbad578b254bcc Author: TB Schardl Date: Sun Sep 1 19:16:53 2019 +0000 [DependenceAnalysis] Allow GeneralAccesses to keep track of operand numbers, to identify which pointer argument of an instruction the GeneralAccess pertains to. commit 6c4fe8695a24df2de21c22f1c76677dc07be4dc3 Author: TB Schardl Date: Sun Sep 1 19:13:50 2019 +0000 [TapirTaskInfo] Fixes to have evaluateParallelState to implement the worklist algorithm and to collect better statistics. commit 9850a793972ce122846f1bb6825513ad6fb7cf1b Author: TB Schardl Date: Sun Sep 1 19:02:20 2019 +0000 [TargetLibraryInfo] Added TLI entry for aligned_alloc. commit 9ac620a38fbef6bdac15211648129cca2ea7aee3 Author: TB Schardl Date: Fri Aug 9 16:44:55 2019 +0000 [AliasAnalysis] Updating code comment for consistency. commit f581b99e7c151d1a095954c890d6fe191f47c258 Author: Victor A. Ying Date: Sun Jul 28 00:06:55 2019 -0400 fix cherry-pick commit 4ed3ba59a5ac0c8c3cf85b7566b313ef90169004 Author: Victor A. Ying Date: Sat Jul 27 18:25:35 2019 -0400 [AliasAnalysis] Avoid unnecessary recursion on nested detaches. This addresses https://github.com/wsmoses/Tapir-LLVM/issues/92 This ensures getModRefInfo runs in linear time, and avoids unbounded stack growth. commit 4d8f576dc3d50e82f627ca7c294768b00db73ba5 Author: TB Schardl Date: Thu Aug 8 20:11:58 2019 +0000 [TapirToTarget] Bug fix to handle functions with no returns during lowering. commit 5658b0fa198e5b6a2bbaecb0dfb32557bacb9767 Author: TB Schardl Date: Tue Jul 16 03:44:41 2019 +0000 [TapirTaskInfo] Adding some simple statistics. commit 0ed8b0569cb48039f12376fa05ca345441c4aebc Author: TB Schardl Date: Tue Jul 16 03:42:39 2019 +0000 [LoopStripMine] Fix check for presence of primary induction variable. commit 17dbeede9b656b0ee449abe36d9283b43db4f88c Author: TB Schardl Date: Mon Jul 15 12:20:08 2019 +0000 [CilkSanitizer] Adding ModRefInfo to general accesses, in place of the previous enum that only recorded whether or not the access was a write. commit 664214e673cb132c2757d39bd2afece3d2148504 Author: TB Schardl Date: Fri Jul 12 14:42:38 2019 +0000 [TapirTaskInfo] Track the exceptional continuation of tasks, and use that when updating and modifying exception-handling code for detaches. commit 766a25bd13a23cf9c2f2700edd1e1076b5787ddc Author: TB Schardl Date: Tue Jul 9 11:37:05 2019 +0000 [TapirLoopInfo] Minor cleanup to debug output. commit 9824940868f2149294fb5a65fc3eabeb5a7bbdb1 Author: TB Schardl Date: Tue Jul 9 11:34:57 2019 +0000 [LoopStripMine] Changing calculation of start iteration for epilogue to improve subsequent scalar-evolution analysis. commit 071e7c421d174948952fc9ad061c03a6f00c1bda Author: TB Schardl Date: Tue Jul 9 11:33:23 2019 +0000 [TapirTaskInfo] Speeding up analysis of maybe-parallel tasks by processing each spindle just once per round. Cleaning up debug output. commit b5d4de8ac4fa6639fe35930bcba0e13fc0016c31 Author: TB Schardl Date: Mon Jul 8 16:58:08 2019 +0000 [Kaleidoscope/Tapir] Code cleanup and fixes to nesting of tasks. commit b977e5c7b355d63c7321a945eee4be8a5380b3d2 Author: TB Schardl Date: Mon Jul 8 12:52:43 2019 +0000 [Kaleidoscope/Tapir] Bug fixes for Cilksan usage. commit 17997220432949a196be305acdcd57f27432e893 Author: TB Schardl Date: Mon Jul 8 12:51:42 2019 +0000 [Cilksan] Standardizing the use of the sanitize_cilk attribute to indicate functions that should be instrumented for Cilksan. commit 84bd163a75323893f57aff5a888bf9dcd3342ec1 Author: TB Schardl Date: Mon Jul 8 01:42:08 2019 +0000 [CSI,Cilksan] Bug fixes for Kaleidoscope tutoiral: fix generation of weak symbols in JIT mode, and ensure that Cilksan instruments each function just once. commit 6ee6f094ec4fcc8ca55c5235c42ce50af98a3256 Author: TB Schardl Date: Mon Jul 8 01:36:43 2019 +0000 [Kaleidoscope] Basic tutorial code for using Tapir to introduce parallel control flow into Kaleidoscope. commit f1a930a36283c25d9d3acb09a73b377b941f3611 Author: TB Schardl Date: Sun Jun 30 11:32:02 2019 +0000 [CSI] Add CSI option to disable promotion of calls to invokes, in order to simplify support for languages with no exceptions. commit dcacd2a1b269da752c539572ec5d863a2705b090 Author: TB Schardl Date: Thu Jun 20 01:38:12 2019 +0000 [LoopStripMine] Bug fix to recalculate the dominator tree in cases where its too hard to incrementally update the dominator tree. commit 7ec3bdfbe25407fe6637e28f8962c3195f59ecad Author: TB Schardl Date: Thu Jun 20 01:36:59 2019 +0000 [CSI] Disabling weak-symbol linkage for function-ID symbols when in JIT mode. commit c689e4e52fdd2baeb7b9cc6759d180f838e2df66 Author: TB Schardl Date: Thu Jun 20 01:36:20 2019 +0000 [CilkABI, CilkRABI] Code cleanup to address some compiler warnings. commit 82448f41ceb03165a53fddc578bd6e873ca031b4 Author: TB Schardl Date: Fri Jun 28 19:46:24 2019 +0000 [Test/Tapir] Fixed test case in light of draft change to CilkABI. commit c0376628952450fe482bf8a07be43ebcadef1091 Author: TB Schardl Date: Mon Jun 10 19:15:58 2019 +0000 [CilkABI] Draft change to insert exception-handling code for Cilk in landing pad after a detach. commit 8f2f0bd89693e7f024f1c3c79bab7ec2d2869c96 Author: TB Schardl Date: Wed Jun 5 20:55:53 2019 +0000 [CircleCI] Updating branch references for the CI. commit 3a507b507d0671f14d1a067a73b24b1de0d60379 Author: TB Schardl Date: Wed Jun 5 20:51:20 2019 +0000 Bug fixes for rebase onto version 8. commit 9d5226658db3aaf4a98e43d21999884e9ba97402 Author: TB Schardl Date: Wed May 1 12:39:50 2019 +0000 [TapirTaskInfo] Bug fix to perform proper forward data-flow analysis on spindles. commit e89e54a2f1dea97d205b2b4054a7e467d53580c9 Author: TB Schardl Date: Thu Apr 18 22:59:15 2019 -0400 [CodeGen] Mark functions with the stealable attribute as having variable-sized objects. This behavior exhibits the closest semantics to what Cilk needs, specifically, when spawning functions with many parameters. commit d14a842179eaae97847589cbf42be0cae990df89 Author: TB Schardl Date: Wed Apr 17 14:59:24 2019 -0400 [SerializeSmallTasks] Bug fix to ensure that analysis of stripmine count is consistent with loop-stripmining pass, plus some code cleanup. commit ad045709e2fa089f6b04a9f0d098a566c264bbba Author: TB Schardl Date: Thu Mar 7 02:18:32 2019 +0000 [CSI] Fix to disable categories of CSI instrumentation via command-line options. commit e11e495266b7bd470883942b7312bf9ae3a19ff7 Author: TB Schardl Date: Wed Apr 10 18:40:23 2019 +0000 [SerializeSmallTasks] Adding simple pass for serializing Tapir tasks whose work is too small to amortize the cost of spawning. Currently this pass only handles small Tapir loops. commit 8ca2500a4bed16682714a4f98272b12bf74c649e Author: TB Schardl Date: Wed Apr 10 18:36:06 2019 +0000 [test/Tapir] Adding unit tests for loop-stripmine pass. commit 72c64d4a1c5802ec78724d33495c83e13c17178e Author: TB Schardl Date: Wed Apr 10 18:32:25 2019 +0000 [LoopStripMine] Code cleanup and reorganization for loop-stripmining pass and functions. commit a00c8f15c66935b4552a1eccc87a362305ab4edd Author: TB Schardl Date: Wed Apr 10 18:27:28 2019 +0000 [TaskSimplify] Minor cleanup for checks to run task-simplify pass on a function. commit 647b38f70765edb2a986c7bf1006ac60b329fee3 Author: TB Schardl Date: Fri Apr 5 02:05:20 2019 +0000 [LoopStripMine] Simplify all new loops, and ensure that static allocas are properly moved to the new entry of the stipmined loop. commit 563eac920f3f9d1021ac6f894a08d3fc2e7005f5 Author: TB Schardl Date: Mon Apr 1 13:09:30 2019 +0000 [CilkSanitizer] Bug fix for evaluating common loops between two memory accesses and their underlying memory allocation. commit 833cf39e7c8f0b0fd965133e0811d98d8aa35d23 Author: TB Schardl Date: Mon Apr 1 12:24:43 2019 +0000 [DependenceAnalysis] Add checks for SCEVable pointers when handling general accesses. commit db223e0ea241ff3891ec16fdf5df09d3b2d4dab5 Author: TB Schardl Date: Wed Mar 27 13:59:15 2019 -0400 [TaskSimplify] Bug fix to ensure that, when redundant sync regions are replaced, the replacement sync region dominates all its uses. commit dd36baa62440d8ab551073d84b7a1e8d8de10815 Author: TB Schardl Date: Wed Mar 27 13:57:55 2019 -0400 [LoopStripMinePass] Add option to serialize loops that analysis indicates are not profitable to parallelize. commit dd34eb3a88cdf4c50bc51fa6e23e50868daa3b33 Author: TB Schardl Date: Wed Mar 27 13:57:12 2019 -0400 [LoopStripMine] Add option to incorporate a sync within the new detached task created when a stripmined loop's epilog is allowed to execute in parallel. commit 9fdac6822e3baf822fcb43d4928ccc3b09eaeec0 Author: TB Schardl Date: Wed Mar 27 13:50:51 2019 -0400 [LoopStripMine] Adding TargetLibraryInfo usage to exclude builtin functions from count of expensive functions. commit ee4946238854ba3f7fc1d1990164e6cfc148292a Author: TB Schardl Date: Wed Mar 27 13:42:57 2019 -0400 [LoopStripMine] Bug fix to handle the case where constant trip counts, for which the epilog performs a constant number of iterations. commit 5ddd3b93197c23dd240c6f1403aa49bcabb30236 Author: TB Schardl Date: Wed Mar 27 13:31:17 2019 -0400 [CilkSanitizer] Add an option to disable the conversion of calls that might throw to invokes. commit 459e9e99db132aa21dec633dbbafa027430e70b3 Author: TB Schardl Date: Tue Mar 26 16:26:42 2019 +0000 [LoopStripMine] Make the parallel execution of the epilog optional, and add a simple cost analysis to determine when it's beneficial to do this parallel execution. commit 6540b8163d16d3aaf3be2fe10feaff8404fe99dc Author: TB Schardl Date: Tue Mar 26 16:24:46 2019 +0000 [LoopStripMine] Code cleanup, and bug fixes to evaluation of loop size. commit 7072bc0c83f458cc2fb1bbed916aa3fd00f23c82 Author: TB Schardl Date: Tue Mar 26 02:18:33 2019 +0000 [MemoryDependenceAnalysis] Draft implementation of data-race-free assumption in memory-dependence analysis. commit df913ef2f20e762a65dab0e52864ada981bc4255 Author: TB Schardl Date: Tue Mar 26 02:10:01 2019 +0000 [DRFAA] Cleaning up code for prototype data-race-free alias-analysis implementation. commit 7ea36895b259a857de3adc682f86157f24149e3d Author: TB Schardl Date: Tue Mar 26 01:44:32 2019 +0000 [MemorySSA] Ignore syncregion_start intrinsics. commit 6f97d53d6e50221c8f2a8945b1ead8203eece9c9 Author: TB Schardl Date: Tue Mar 26 01:40:56 2019 +0000 [LoopAccessAnalysis] Add support for data-race-free dependence analysis. commit 225774761155b19f93862810f96894d323733f6e Author: TB Schardl Date: Tue Mar 26 02:37:37 2019 +0000 [LoopPass] More consistent maintenance of TaskInfo analysis through loop passes. commit cdae352c76a9d1feda08617420f696dce0c7f24d Author: TB Schardl Date: Tue Mar 26 02:26:37 2019 +0000 [TaskSimplify] Reorganizing code to compute MaybeParallelTasks more efficiently. commit cabb02f419ca542ab56a54d46088fd9a42f55fc8 Author: TB Schardl Date: Mon Mar 25 23:09:47 2019 +0000 [Tapir] Separate the logic of checking a Loop for Tapir-loop structure from the logic of examining Tapir-loop metadata. commit 887e457e02bc72016b415bfe0421a5fe653dd4b2 Author: TB Schardl Date: Mon Mar 25 23:19:43 2019 +0000 [PassManager] Incorporate loop stripmining into the old and new pass managers, and disable second run of optimizations after Tapir lowering. commit fcb15e550fddb0981efcae7db89fd2db9823f7ca Author: TB Schardl Date: Mon Mar 25 23:15:19 2019 +0000 [LoopStripMine] Mark serial loops produced from loop stripmining as having been derived from Tapir loops. commit fccb780cc496b10837dcd897d4704f0d0a2952ef Author: TB Schardl Date: Mon Mar 25 23:13:12 2019 +0000 [LoopInfo] Add utility functions to mark loops as having been derived from Tapir loops. commit 322399125b6a4af135d1e9a87743595be2f6bbe8 Author: TB Schardl Date: Mon Mar 25 23:03:10 2019 +0000 [LoopStripMine] Emit pass-analysis information via ORE, plus some code cleanup. commit 05356437da7eeee75af0684be56f0407e959169a Author: TB Schardl Date: Mon Mar 25 23:26:04 2019 +0000 [TailRecursionElimination] Move syncregion_start intrinsics along with allocas to the function entry when eliminating a recursive tail call. commit 996634fd9bb97bf1a893d3683d7b8a1db70e77f9 Author: TB Schardl Date: Mon Mar 25 23:11:04 2019 +0000 [CodeMetrics] Ignore syncregion_start intrinsics when searching for instructions that are not duplicatable. commit 50f30b232de4b6d38eeba0609a61c4f5713caf44 Author: TB Schardl Date: Mon Mar 25 22:58:51 2019 +0000 [TapirTaskInfo] Adding support to output a task to a raw_ostream using the << operator. commit fcdc6444c006ccf3fd463ee24ae8350b09a039af Author: TB Schardl Date: Mon Mar 25 22:54:19 2019 +0000 [LoopStripMine] Exclude counts of builtin functions when evaluating Tapir loops for stripmining. commit 1621d754606df7e956e028a15ba4384e413cb286 Author: TB Schardl Date: Mon Mar 25 22:48:22 2019 +0000 [CodeMetrics] Added a counter for the number of calls to builtin functions. commit ca25f354177f350b8542b783d48004468b943c2b Author: TB Schardl Date: Fri Mar 22 02:41:01 2019 +0000 [DRFAA] Draft pass to add alias-analysis metadata based on data-race-free assumption. commit fb560bac778a140e4c6ada160c88648b396f04b3 Author: TB Schardl Date: Fri Mar 22 02:12:07 2019 +0000 [Utils] Fixing up some deficiencies in the Emacs mode, and adding some more keywords. commit 782a2258ddfbfd222cde11e8aa7ab7c101f1264d Author: TB Schardl Date: Fri Mar 22 02:11:15 2019 +0000 [Tapir] Minor code cleanup. commit c1abb5e9b7a16fcb38d560bad38833b95618a022 Author: TB Schardl Date: Fri Mar 22 02:09:00 2019 +0000 [LoopStripMine] Initial commit of a pass to stripmine Tapir loops. The pass seems to work on the Cilk application benchmark suite. The cost analysis in the pass is rudimentary, and some general functionality is missing, e.g., inserting a prolog loop instead of an epilog loop and making the detach of the stripmined loop optional. commit 834e92d776c05268abf7fb6ff296bc0a113d0010 Author: TB Schardl Date: Sun Mar 17 18:15:11 2019 -0400 [CSI] Big fix for CSI with AOT compilation, after changes to support CSI+JIT. commit 946896c653ed4f27ba9cfa02c6e43b64ce6b3269 Author: TB Schardl Date: Sat Mar 16 23:18:39 2019 -0400 [CSI] Draft modification to populate FED tables with LLVM IR information when Debug information is not available. commit fc434be5d975271dc1f83173b8171dff5a13b85c Author: TB Schardl Date: Sat Mar 16 23:17:50 2019 -0400 [CSI] Bug fixes for instrumenting array allocas and using dependency analysis. commit 8819829371d5ccca0eeeb25753d5f1082710ea06 Author: TB Schardl Date: Fri Mar 15 17:03:05 2019 -0400 [Tapir] Cleanup management of Module throughout the TapirToTarget lowering code. commit e62b64caed7a245096d1af7ad403f32d0c81f770 Author: TB Schardl Date: Fri Mar 15 16:45:45 2019 -0400 [CilkSanitizer] Allow MightHaveDetachedUse to process Function Arguments as well as Instructions. commit 70ec7c06c8d2831dbf16a0efafefa018a93c4d13 Author: Daniele Vettorel Date: Fri Mar 15 19:53:21 2019 -0400 [CSI] Support for CSI+JIT. NOTE: Tapir instrumentation is still not working. commit 5c32b6c61a3eb1949bfb60eaf7836e6707516c40 Author: TB Schardl Date: Sat Mar 2 04:36:10 2019 +0000 [CSI] Add aligned `new` and `delete` functions to CSI. commit d3fc79fecf59c2a9c4c5b30642d44d033b956768 Author: TB Schardl Date: Sat Feb 23 15:45:36 2019 +0000 [LoweringUtils] Ensure that helper functions do not inherit the 'always_inline' attribute from their parents. commit 61e918838d669279d4add2f989fa45dafe77753c Author: TB Schardl Date: Sat Feb 23 15:43:53 2019 +0000 [AliasSetTracker] Add syncregion intrinsics to list of 'marker' intrinsics that don't actually affect memory. commit 252582de919f2a7534261836c14c1d65854c9660 Author: TB Schardl Date: Sat Feb 23 15:42:35 2019 +0000 [TapirTaskInfo] Add helper function to check if a spindle belongs to a subtask. commit 18d14444ece6ef6bdc4474d6ec33a14e79c594b9 Author: TB Schardl Date: Sat Feb 23 15:40:47 2019 +0000 [CilkSanitizer] Adjusting order of checks to avoid passing invalid arguments to PointerMayBeCaptured. commit 43198eddc0006e738600e2e4e2c4703a895aa385 Author: TB Schardl Date: Sat Feb 23 14:36:54 2019 +0000 [Tapir] Mark generated helper functions themselves with DoesNotThrow commit 714db76ebad352af3210de590a071ae6c4119e59 Author: TB Schardl Date: Sat Feb 23 04:23:07 2019 +0000 [Tapir] Add DoesNotThrow information to calls inserted in Tapir lowering process. commit 1a7318da2497995c9cf9f4463fe496fc8212c124 Author: Daniele Vettorel Date: Thu Feb 21 16:07:33 2019 -0500 Some fixes in handling sync hooks, disabled an assert that is too strong (duplicated unwind block CSI ID). There is still a bug in handling exceptions (setupCalls() introduces it) commit ed5e202ba282dfc76c7fe5390989a69903364549 Author: TB Schardl Date: Thu Jan 31 17:08:54 2019 +0000 [LoopSpawningTI] Fixed a bug where LoopSpawningTI was failing to extract grainsize metadata on Tapir loops. commit d1f46119bd0541444a8ebca586e98ecb0d9c7958 Author: Daniele Vettorel Date: Thu Jan 31 17:25:51 2019 -0500 Fixed CSI pass trying to interpose indirect calls commit 8b83b6eb9ed740295a94c8aea495007647292dc8 Author: Daniele Vettorel Date: Thu Jan 31 17:18:43 2019 -0500 Actually use the options passed to CSI instead of assuming they're always true commit a1c325e86872a90c051c2bebbf5d86d90cd3b3da Author: Daniele Vettorel Date: Thu Jan 31 16:57:53 2019 -0500 Potential fix for indirect calls when instrumenting callsites commit e16e50e28b303217240aaada557d12c72e2d3026 Author: Daniele Vettorel Date: Thu Jan 31 13:23:41 2019 -0500 Removed debug instructions that disabled some options commit 2b91c61642d898abc76b12612679e62f37abd70b Author: TB Schardl Date: Thu Jan 31 15:43:43 2019 +0000 [IndVarSimplify] For Tapir loops, ensure that the loop contains a canonical induction variable that starts at 0, and add such an IV if not. commit fdcbfb74130993d6d8fa945b910537993bf026ce Author: TB Schardl Date: Thu Jan 31 14:44:37 2019 +0000 [CilkSanitizer] Fixed dependency-checking code for unusual exit blocks of loops, specifically, loop blocks terminated by unreachable. This patch addresses issue #84. commit 07892bbf94fafc5c5961ec667fc2038e01c2ccfb Author: TB Schardl Date: Thu Jan 31 14:42:48 2019 +0000 [LoweringUtils] Fixed processing of exception-handling code in nested tasks generated by -O0. This patch addresses issue #85. commit c24c30a9de591b695ec803e50b1a59cafb88851c Author: TB Schardl Date: Thu Jan 31 14:40:49 2019 +0000 [Tapir] Minor code cleanup. commit d7ae6ea9fccbfe68067c248b333d978cdd6ae666 Author: Daniele Vettorel Date: Tue Jan 29 17:03:51 2019 -0500 Fixes to how runtime spawns are tracked. Fixes to function interpositioning commit 56d828481fe4f9b6b84012767f1a65851e17682a Author: TB Schardl Date: Sun Feb 24 00:32:48 2019 +0000 [CSI] Squased commit of changes by @banex19 to support CSI instrumenting allocation functions, compile-time CSI instrumentation, and surgical instrumentation. commit f0074b5d5d3408430f67a24fd96893e7818cab94 Author: TB Schardl Date: Fri Jan 25 03:35:01 2019 +0000 [CSI] Add CSI hooks for allocation functions -- i.e., variants of malloc and new -- and their corresponding deallocation functions -- i.e., variants of free and delete. commit 07a487962c89caff5b769cb89ef038bf3fd4a482 Author: TB Schardl Date: Wed Dec 19 09:22:05 2018 -0500 [CSI] Fix CSI to instrument global constructors and functions in the startup section. commit a29a982497f091ecad148b9a19f2049e00f43acf Author: TB Schardl Date: Wed Dec 19 09:20:28 2018 -0500 [CaptureTracking] Fix CaptureTracking to handle uses in constant expressions. commit 9b55046b77fc4844ee0ac618b9a5b840f8e236c0 Author: TB Schardl Date: Fri Nov 30 20:11:48 2018 +0000 [MemorySSA] Prototype support to use the DRF assumption when analyzing clobbering definitions. commit b3dac0871c533f63eff19e13ebf4d8d92cc7f416 Author: TB Schardl Date: Fri Nov 30 20:04:16 2018 +0000 [DRFAA] Cleaning up code to use TaskInfo method for querying may-happen-in-parallel analysis results. commit 771dde75be922a0925c4384f792c642aef792161 Author: TB Schardl Date: Fri Nov 30 20:03:17 2018 +0000 [AliasAnalysis] Adding support to legacy pass manager for running DRFAA. commit d0b6d17708a60448c4ef65a95ef0cc239987936c Author: TB Schardl Date: Fri Nov 30 20:02:35 2018 +0000 [TapirTaskInfo] Adding logic to TaskInfo to compute and cache may-happen-in-parallel analysis when requested. commit f3eec05df049d2ef0f671290456bc65374536cba Author: TB Schardl Date: Thu Nov 29 23:38:13 2018 +0000 [CilkABI] Fixed bug in lowering Tapir to the Cilk runtime when the destination of the detach-replacement function call contains PHI nodes. commit fbf840903f6f5811027443b7f7bc52964ed5715b Author: TB Schardl Date: Thu Nov 29 19:58:59 2018 +0000 [llvm-diff] Fix llvm-diff to recognize and check Tapir instructions. commit b4516128fdebf72e18720046e141e302d9e8b868 Author: TB Schardl Date: Wed Nov 28 01:53:30 2018 +0000 [SROA] Get results of analyses in a deterministic order to ensure consistent regression-test results across compilers. commit 48b64b9833b71afb8c742d773c327be1774d2090 Author: TB Schardl Date: Tue Nov 27 16:29:49 2018 +0000 [DataRaceFreeAliasAnalysis] Add prototype alias analysis that deduces modref info from Tapir parallel constructs assuming the program is DRF. commit 4c47d30af0b192b2e41f06fb63739a587673893e Author: TB Schardl Date: Tue Nov 27 15:19:12 2018 +0000 [TapirTaskInfo] Comment modernization, and removing dependence on Tapir utility routine defined in Transforms/Utils. commit a8806e3ad9f87d8fe5aac514b0c316f266316206 Author: TB Schardl Date: Mon Nov 26 14:30:47 2018 +0000 [CircleCI] Reverting changes to use clang+lld for CI builds. commit 249eef8adfa1a33e61ed52847b123dae4405c493 Author: TB Schardl Date: Mon Nov 26 13:58:15 2018 +0000 [CircleCI] Attempting to fix CircleCI configuration to use clang+lld. commit 33c81321603eb4df889f8cfb8a8c43c7651e21b0 Author: TB Schardl Date: Mon Nov 26 13:34:39 2018 +0000 [CircleCI] Switching to clang+lld for builds on the CI, to try to improve build efficiency. commit 146291b98e6bec1f8ad2ac11d6d072d0482859ed Author: TB Schardl Date: Mon Nov 26 13:32:09 2018 +0000 Removing submodules. Separate subprojects should be checked out or cloned in the standard way. commit 10e22a97ba840cdf23c4a3b4569530aa7be2c380 Author: TB Schardl Date: Mon Nov 26 12:39:56 2018 +0000 [Tapir] Code cleanup to address warnings from compilation using Clang. commit d41f8fe4e894193ed953082f5aef1d84e25ff92b Author: TB Schardl Date: Sat Nov 24 18:37:35 2018 +0000 [Test/TapirTaskAnalysis] Added test case for may-happen-in-parallel analysis with discrimination syncs. commit e6a6f9f5b4a187ca7a614b724305e485b134cbc3 Author: TB Schardl Date: Sat Nov 24 18:36:53 2018 +0000 [TapirTaskAnalysis] Consolidating common code from different passes for performing may-happen-in-parallel analysis. commit 60c93ca97f1d4f99df02194f53e04142e49b905c Author: TB Schardl Date: Thu Nov 8 22:20:24 2018 -0500 [CircleCI] Commenting out unsupported 18.10 build. commit 9e55aaaa1eb5c26cfbf7123407414e83bb2a7257 Author: TB Schardl Date: Thu Nov 8 22:12:25 2018 -0500 [CircleCI] Trying to avoid possible bug with the Linux kernel affecting Sanitizer tests. commit 0f3516e81e4d8f73ae2555f3a6b9e29de961457b Author: TB Schardl Date: Wed Nov 7 23:40:18 2018 -0500 [TapirTaskInfo] Avoid warnings about unused variables during release builds. commit 40ed82f5604bc2b39e5165ec78d6a73cda33d80f Author: TB Schardl Date: Wed Nov 7 23:34:13 2018 -0500 [CircleCI] Disabling Ubuntu 14.04 tests, as lld testing with the sanitizers does not appear to work on this OS. commit 4d8eef8fdfc0798166738ea542f8ec7d34fe3828 Author: TB Schardl Date: Wed Nov 7 21:57:04 2018 -0500 [CircleCI] Reverting configuration change to use lld. commit 2ed2ee8d873d1782e81f1bdac11bfb00483ab470 Author: TB Schardl Date: Wed Nov 7 21:31:44 2018 -0500 [CircleCI] Fix tab-vs-space typo. commit b93218c052de720427415ec0e824d3c23f3f5109 Author: TB Schardl Date: Wed Nov 7 21:27:54 2018 -0500 [CircleCI] Updating CircleCI to build on Ubuntu 18.04 and with/using lld. commit 989bffe36a51696baa9c7325f5deb2558525cebc Author: TB Schardl Date: Wed Nov 7 21:14:01 2018 -0500 [Instrumentation] Bug fixes to remove ArgMemOnly, InaccessibleMemOnly, and InaccessibleMemOrArgMemOnly attributes from a function after instrumentation. commit 5ee7268fab9efef0fdf408624ff4223917dc27cb Author: TB Schardl Date: Wed Nov 7 08:40:17 2018 -0500 [Tapir] Strengthen the properties on outlined functions for Tapir tasks and Tapir loops. Specifically, these oulined functions now use private linkage and global unnamed addresses. commit 78b4e7428da165055ab7a3ccf970deba83048865 Author: TB Schardl Date: Wed Nov 7 08:28:28 2018 -0500 [LoopSpawningTI] Documentation and minor code cleanup for the LoopSpawningTI pass. commit 837512c93f12e600243ab7bcd1143d35c378dd74 Author: TB Schardl Date: Mon Nov 5 14:15:27 2018 -0500 [SpawnRestructure] Remove dead code. commit 1144f687e5cda9cdd1284cc6f96444d34b3afdbd Author: TB Schardl Date: Mon Nov 5 13:44:48 2018 -0500 [TaskSimplify] Removing unused function. commit 7c4ca07c12125c855e127d9a8aedf9ffb9d3e6e5 Author: George Stelle Date: Mon Apr 30 09:58:51 2018 -0600 Added nworkers call for openmp abi to fix DAC codegen commit 0c8951c9a3f78ed3aee34eeac8b7acd78c6a8479 Author: TB Schardl Date: Sun Nov 4 23:55:42 2018 -0500 [Bazel] Adding WORKSPACE file to allow Tapir-LLVM source to be built into the XLA compiler. commit f5f5336d0999aaa2997f9b987c4810aa2572b441 Author: TB Schardl Date: Sun Nov 4 23:37:02 2018 -0500 [Tapir] Code modernization and cleanup, based on new TaskInfo-based lowering. commit 48f7bb8fb17520aa319a76e4a56b34cbd0f41920 Author: TB Schardl Date: Sun Nov 4 22:26:56 2018 -0500 [Tapir] Documentation on task-info-based lowering routines. commit 20c984a5ae1bf958d77a0cde0c08a8b4908eb312 Author: William M. Leiserson Date: Wed May 23 12:53:44 2018 -0400 Test parallel instructions in OCaml. commit fb0ade3e823413379c807810e7e46429a2b6deac Author: William M. Leiserson Date: Wed May 23 10:47:31 2018 -0400 Fix spelling error. commit 98122099ab6ff338791ee2b9b1449e067f7f9111 Author: William M. Leiserson Date: Tue May 22 14:40:18 2018 -0400 [Tapir] TapirTarget_val macro, and add the llvm.tapir_opt OCaml package to the set of packages. commit a566450438a43ad073d7fab1337de2da6455072c Author: William M. Leiserson Date: Mon May 21 17:17:34 2018 -0400 Add OCaml bindings for parallel instructions. commit 8c6105781972048026386fce8fb15b5c5e669461 Author: William M. Leiserson Date: Mon May 21 16:32:21 2018 -0400 Add C bindings for Tapir instructions. commit b3d5e4284044b095a448797ae348c84e0cb3e4d2 Author: William M. Leiserson Date: Mon May 21 15:47:23 2018 -0400 [Tapir] Add OCaml bindings for the Tapir opt passes. commit 6802fc5d2225fddfaf66a6f4e71731aefeb274c4 Author: William M. Leiserson Date: Mon May 21 12:12:28 2018 -0400 [Tapir] Add C bindings for Tapir passes. commit ca9de15b121eee5b960c532704c8167c6592e36a Author: George Stelle Date: Mon Apr 30 14:16:47 2018 -0600 Only save floating point state for x86 family commit 3d8fdf6b837d18a2f1b419499982d3d2340ad880 Author: TB Schardl Date: Sun Nov 4 16:07:13 2018 -0500 Bug fixes for rebase onto release_70 commit ba188b88e5c7e05cd0139a2d3187ad827884ddf0 Author: TB Schardl Date: Thu Nov 1 09:00:59 2018 -0400 Updated compiler-rt submodule commit a62d638b7ce15cf579983330ee6c3d409b066806 Author: TB Schardl Date: Thu Nov 1 04:00:46 2018 +0000 Updated submodules commit 3b77e57de19b11d18d47c7945772673948de1d86 Author: TB Schardl Date: Thu Nov 1 03:43:32 2018 +0000 [Tapir] Minor code cleanup for consistency. commit 2dff15e8ab63fb0bc569c86dd33a0e4ab7b532d1 Author: TB Schardl Date: Wed Oct 31 22:23:51 2018 +0000 [CilkSanitizer] Add support for static race detection to make CilkSanitizer's instrumentation more precise. At a high level, CilkSanitizer now instruments a module in several phases: 1) Examine all instructions -- specifically, memory accesses and call sites -- that could result in a race. 2) Use DependenceAnalysis and AliasAnalysis to perform static race detection to try to prove which of these instructions cannot participate in a race. 3) Propagate information about which functions might contain racing instructions internally to all functions in the module, to determine which callsites must be instrumented and which can be suppressed from performing CilkSan updates. 4) Insert instrumentation around all potentially racing instructions and other necessary IR objects, e.g., Tapir instructions, memory allocation, and function entry and exit. commit 0d0aa3ca333b72325b734289f0346a5b180e98b1 Author: TB Schardl Date: Wed Oct 31 22:21:48 2018 +0000 [CilkSanitizer] Add support for instrumenting allocation functions (e.g., malloc and new) and their corresponding free functions (e.g., free, delete). commit a59f16ee201109d4201df79be1ebe67328bc1977 Author: TB Schardl Date: Wed Oct 31 22:04:22 2018 +0000 [CilkSanitizer] Initial changes to support use with new pass manager and incorporate additional analyses. commit d9f8ef88945597756d358c80a37437ce9f9d6612 Author: TB Schardl Date: Wed Oct 31 21:48:43 2018 +0000 [CSI,CilkSanitizer] Add support for running CSI and CilkSanitizer using the new pass manager. commit b6419e20d3003b21618ba069ed2092552e819795 Author: TB Schardl Date: Wed Oct 31 21:40:32 2018 +0000 [CSI] Rework how function CFG's are prepared and how hooks are inserted into the CFG in order to instrument exception-handling code more simply. commit 3f5130ba1b14f4241ed87fac15577d4377b2c61c Author: TB Schardl Date: Wed Oct 31 21:31:07 2018 +0000 [CSI] Bring the API of the CSI alloca hooks in line with other CSI code hooks. commit cfa6099d21c574ba0ee98592776f5e57c5816d9c Author: TB Schardl Date: Wed Oct 31 21:21:51 2018 +0000 [CSI] Code cleanup and modernization. commit 19d73dd5a29b9eec2a2e3880f98b22e61e32d85a Author: TB Schardl Date: Wed Oct 31 19:17:58 2018 +0000 [Tapir/LoweringUtils] Skip successor blocks of detached-rethrow instructions when computing task inputs and outputs. commit a3e44367380d1bb2f997b28ac5193cb7b8d3c8fd Author: TB Schardl Date: Wed Oct 31 17:59:14 2018 +0000 [DependenceAnalysis] Simplify the coefficient information to help predicate tests handle sign-extended and zero-extended values. commit 868cbb12455720e339b9ce683571683284eca8b6 Author: TB Schardl Date: Wed Oct 31 17:57:06 2018 +0000 [DependenceAnalysis] Add support for evaluating predicates in the context of the common loop containing both source and destination. commit 27bfc007d0037155c196bc6cd5ca8c7e6cf40fac Author: TB Schardl Date: Tue Oct 30 16:46:34 2018 +0000 [FunctionAttrs] Add analysis to functionattrs to determine which functions only access argument and/or inaccessible memory. commit 2e75d223a2bbc4d32ea94c50a5bc0cbc3e351ccf Author: TB Schardl Date: Tue Oct 30 12:14:56 2018 +0000 [LoopPass] Add TaskInfo as a preserved analysis available by default to all Loop passes in the old pass manager. commit 731fe361cdf4c26e6aa809425d5202c933ddc1d0 Author: TB Schardl Date: Fri Oct 19 20:25:11 2018 +0000 [TapirTaskInfo] Minor code simplification. commit 85c9ee663ac1eb5aac639c2cd9408ea233dc6006 Author: TB Schardl Date: Fri Oct 19 15:24:10 2018 +0000 [DependenceAnalysis] Added basic capability to DependenceAnalysis to evaluate memory accesses other than loads and stores. commit 32ce14b10cd3ad93602ac3345ddd800cee27ce95 Author: TB Schardl Date: Wed Oct 17 18:53:22 2018 +0000 [DetachSSA] Deprecating DetachSSA pass, in favor of task analysis pass. commit b627f56162cdd00b6eb21879787c8d8e585a690c Author: TB Schardl Date: Wed Oct 10 17:51:11 2018 +0000 [Test/NewPM] Updating tests to include TaskSimplifyPass in new-pass-manager pipelines. commit f7eb0492ac790c06b1fe77d827c9136ae9ac23e9 Author: TB Schardl Date: Wed Oct 10 17:50:21 2018 +0000 [Test/Loop] Updating tests to handle task analysis in new pass manager for loop passes. commit 2dd36f1fa4506e33c08e27212bdebcb2a39c4751 Author: TB Schardl Date: Wed Oct 10 17:46:57 2018 +0000 [LoopAnalysis] Modifying loop passes in new pass manager to maintain task analysis. Task analysis is currently maintained by recomputation at the end of the pass if the dominator tree might have changed. In the future, this should be optimized to update task analysis in a more efficient fine-grained manner. commit 6dca217eb87d6ed34ca4aba953b0c8618ef4f554 Author: TB Schardl Date: Wed Oct 10 17:40:57 2018 +0000 [Test/LoopPass] Adding Tapir task analysis to tests of loop passes. commit 119deb3e86572bbe4091db3166eb30c55989ce17 Author: TB Schardl Date: Wed Oct 10 17:39:06 2018 +0000 [Test/Tapir] Converting Tapir tests to test new LoopSpawning pass using old and new pass manager. commit ee28d154e88a0a0475869482b8131e118c218d85 Author: TB Schardl Date: Tue Oct 9 16:24:10 2018 +0000 [LoopUnroll] Modifying loop unrolling to use and update task info. commit 9d56d6035c8c665ee4a3342b31d27da161ca5be9 Author: TB Schardl Date: Tue Oct 9 16:20:36 2018 +0000 [Outline] Adding distinguishing name to outlined functions from Tapir lowering, per the suggestion in #78. commit 6b6c95d5a4a1c37e4c402b8aa7ea609958a8fb51 Author: TB Schardl Date: Tue Oct 9 16:16:06 2018 +0000 [LAA] Fixing LoopAccessAnalysis to ignore Tapir instructions. commit 7083d509ab425ea0887a05c73a65537221f8ee81 Author: TB Schardl Date: Tue Oct 9 16:12:15 2018 +0000 [LTO] Adding experimental support for Tapir lowering in LTO and ThinLTO pipelines. commit 4e4c12a53b8b78afcb152a2ed90540f298418b02 Author: TB Schardl Date: Tue Oct 9 15:38:47 2018 +0000 [TapirTaskInfo] Add support for manually recomputing TaskInfo for a function, given an updated dominator tree. commit 916e89bd1d01d4207c4d5bf89d3ccbb5a392657c Author: TB Schardl Date: Tue Oct 9 16:04:18 2018 +0000 [PassBuilder] Adding support for Tapir lowering pipeline to new pass manager. commit 4f4e0da1039946b03b2ebb44e991d879961d097a Author: TB Schardl Date: Tue Oct 9 16:07:48 2018 +0000 [TargetLibraryInfo] Moving Tapir target for lowering into TargetLibraryInfo. commit b40885d682376037cabbe401ff23c048fc48d69d Author: TB Schardl Date: Tue Oct 9 16:02:02 2018 +0000 [LoopAnalysis] Add TapirTaskInfo analysis to loop passes managed by the new pass manager. commit edd0c155a0258134360dc0a7cdfb2a477176d03e Author: TB Schardl Date: Fri Oct 19 11:35:09 2018 -0400 [TapirTaskInfo] Edit to handle unreachable blocks with no associated spindle. commit bacd19fb11756b077678c824f73c66d1e66bd946 Author: TB Schardl Date: Fri Oct 19 11:34:10 2018 -0400 [TapirTaskInfo] Code simplification. commit d094ab9374df1c41e14204235fa8eb6be32cb0ce Author: TB Schardl Date: Wed Aug 29 03:27:05 2018 +0000 [Lowering] Fixed a bug that caused Tapir loops to fail to recognize some inputs. commit 64bfdf0a408a06adb4cbc18649124322fb167947 Author: TB Schardl Date: Wed Aug 29 03:22:31 2018 +0000 [TapirUtils] Add ability to remove loop metadata from a cloned Tapir loop. commit 83f421a9f1231aff4e36b0cd1edd73615d6c2ddf Author: TB Schardl Date: Wed Aug 29 03:12:00 2018 +0000 [LoopSpawningTI] Allow the process of preparing a Tapir loop for outlining to fail gracefully. commit 923cf529001a13a32f2c7300b1865ff30a4020d8 Author: TB Schardl Date: Wed Aug 29 01:53:29 2018 +0000 [Outline] Bringing outlining code for Tapir in line with similar functions. commit 8b0420abf849e8151f3e5db5c3fe474046a2efff Author: TB Schardl Date: Tue Aug 28 12:07:29 2018 +0000 Updated compiler-rt submodule commit f6e6db92b089da699a256a0f5e97837f6da71291 Author: TB Schardl Date: Tue Aug 28 03:33:27 2018 +0000 Updated compiler-rt and clang submodules commit 40e02a9c869fa8ed37fc4026c3c149f8f803750b Author: TB Schardl Date: Sat Aug 25 18:45:23 2018 +0000 [TaskSimplify] Add optimization to remove redundant syncs and redundant sync regions. commit ad1133cec6fd7d07a7d3f7fe98e7a8c9aef8ccbe Author: Masakazu Bando Date: Tue Apr 3 01:21:52 2018 +0000 added support for alloca hooks. commit 44da23e1cc4dcbc8009dac0cde63594cee98182d Author: TB Schardl Date: Sat Aug 25 13:49:36 2018 +0000 [Outline] Code cleanup in Tapir outline functions. commit f12577a41c3a0ff367c0be8232b5cf3bb89d3d78 Author: TB Schardl Date: Sat Aug 25 13:48:25 2018 +0000 [Tests] Adding and updating unit tests for TaskInfo-based lowering passes and task-simplify pass. commit 61a1458be5c334da9dba4301711ff9a179601855 Author: TB Schardl Date: Sat Aug 25 13:46:15 2018 +0000 [TaskSimplify] Adding pass for simplifying Tapir tasks within a function. This pass uses TapirTaskInfo to perform more complex simplification than SimplifyCFG can manage easily. commit 3033ea37caece40e8d43e79b6eed168a2b1ff503 Author: TB Schardl Date: Sat Aug 25 13:34:23 2018 +0000 [NewPM] Added Tapir lowering passes to new pass manager. commit 3b0c2f9c90a08aa6b93b9429ee1716531155f4ba Author: TB Schardl Date: Sat Aug 25 13:21:23 2018 +0000 [LoopSpawningTI] New LoopSpawning pass, which outlines Tapir loops using TapirTaskInfo analysis. Like the refactored TapirToTarget pass, this pass outlines all Tapir loops, including nested loops, in linear time. The LoopSpawningTI pass assumes that IndVarSimplify has been run on all Tapir loops before execution. commit 1bdcb5fc824336199e21efba7be8354db24c9f6f Author: TB Schardl Date: Sat Aug 25 12:57:33 2018 +0000 [TapirToTarget] Major refactor, primarily to use TapirTaskInfo analysis. This commit includes the following changes. * Refactor TapirToTarget and associated classes to use TapirTaskInfo to outline Tapir tasks. By using TapirTaskInfo, TapirToTarget now outlines all tasks within a function, including nested tasks, in linear time. This refactor also updates the the interface between TapirToTarget and the Tapir targets, such that each target can simply update an already outlined task, rather than having to call lowering utilities to perform outlining themselves. * Add an interface to the new pass manager. To simplify this interface, this change modifies the argument to the TapirToTarget pass to be the enum class of Tapir targets, rather than a pointer to a constructed Tapir target. * Fixes memory management for TapirToTarget and Tapir targets to properly clean up objects constructed during lowering. This commit includes a new helper class for gathering information on Tapir loops. This commit does not make use of this class, but the changes to incorporate this class were hard to disentangle from the other changes. (My bad, sorry.) The Tapir-loop helper class is used extensively by an updated LoopSpawning pass, which appears in another commit. commit e8942608ab22e29bfbbaa8629b0880d83e991201 Author: TB Schardl Date: Sat Aug 25 12:34:58 2018 +0000 [TapirLoopHints] Remove OptimizationRemarkEmitter from TapirLoopHints. commit 3267b2ffcad693f4c9b2ca75e4cbde2e4f23d691 Author: TB Schardl Date: Sat Aug 25 12:27:07 2018 +0000 [TapirTaskInfo] Updates to iterators, graph traits, and memory management. commit 62f7833b5e33ea87cf757dfdc705a79a8a7da3d2 Author: TB Schardl Date: Sat Aug 25 03:46:31 2018 +0000 [TapirUtils] New utility functions for serializing detaches, based on TaskInfo. commit 784ac698ec74189fad755c504d3de2d8ce733b40 Author: TB Schardl Date: Sat Aug 25 03:42:56 2018 +0000 [SimplifyCFG] Prevent SimplifyCFG from serializing a detach that can throw. commit bfdb560cb0315dbea0cd9289fdd6e1fefde6b040 Author: TB Schardl Date: Sat Aug 25 03:40:44 2018 +0000 [IndVarSimplify] Enable IndVarSimplify to effectively simplify induction variables in Tapir loops. commit 3014ebd35986946c0526892f05bc50212a3540e5 Author: TB Schardl Date: Sat Aug 25 02:33:05 2018 +0000 [DetachInst] Added method on DetachInst to landing pad for detaches that can throw. commit 9a2464462785bf1d6d55268016feae003a9358d1 Author: TB Schardl Date: Sat Aug 25 02:12:47 2018 +0000 [Test/TapirTaskInfo] Adding a few unit tests for Tapir task info. commit 45ab1afed8c8bf794e446e596beec34f47633776 Author: TB Schardl Date: Thu Jul 19 03:51:19 2018 +0000 [TapirTaskInfo] Updating passes to use isAllocaParallelPromotable method in TapirTaskInfo. commit 19162476bde74bba2659487c65821717d17cbebb Author: TB Schardl Date: Thu Jul 19 03:46:03 2018 +0000 [TapirTaskInfo] Enhancements to compute incoming and outgoing edges between spindles for easy traversal and to analyze allocas in functions that spawn tasks. Prototype function pass that uses TapirTaskInfo to analyze functions. commit 801e90aaa91e71d2381bffe59a65529fb9e11723 Author: TB Schardl Date: Thu Jul 12 20:48:58 2018 +0000 [TapirTaskInfo] More work on TapirTaskInfo analysis, including predecessors and successors of spindles and helper routines to perform fixed-point analysis on spindle structure. commit 8a110adde61468628c72a88ba900182ca4fd0bf9 Author: TB Schardl Date: Thu Jul 12 12:26:24 2018 +0000 [FunctionAttrs] Ignore Tapir instructions when determining whether a function accesses memory. For this analysis, Tapir instructions only access memory accessed by other instructions in the function. commit a2092c3a35afc851ef955705e262a2f76bcf9a24 Author: TB Schardl Date: Thu Jul 12 12:25:00 2018 +0000 [MemorySSA] Adjust MemorySSA to treat sync more like a fence. commit af445de8f3db8dfaf432bbddd868a91902b16dca Author: TB Schardl Date: Tue Jul 3 17:20:27 2018 +0000 [TapirToTarget] WIP Test code for TaskInfo analysis. commit f26294a5c0a60f7dfc00fc12d799fce9b688e0cd Author: TB Schardl Date: Tue Jul 3 17:16:51 2018 +0000 [TaskInfo] Initial commit for WIP TaskInfo analysis. commit 5fd7c0183dd287dc67aebad24048527f320f92ef Author: TB Schardl Date: Tue Jul 3 17:07:45 2018 +0000 [Test/Tapir] Test for lowering static allocations. commit 2e50cb7198f115a33df28f7d2c8b6f5ac244ea86 Author: TB Schardl Date: Tue Jun 26 19:08:00 2018 -0400 [LoopSpawning] Fixed bug where LoopSpawning expanded code in the wrong place when replacing phi nodes. commit 9f17fd47ab8d0f74b3eee1403c035e6ccdd43c6e Author: TB Schardl Date: Mon May 7 00:15:31 2018 +0000 Updated Clang submodule commit f4427782fa025d3da39bc756e7f42832460afcc2 Author: TB Schardl Date: Sun May 6 13:18:18 2018 +0000 Updated compiler-rt submodule commit 1353be13c3aa688ca6484c40da154181b3796345 Author: TB Schardl Date: Sun May 6 13:14:29 2018 +0000 Updated clang submodule commit 7d649cdf67c04f326dd26a6f5ebcc7d1760df33d Author: TB Schardl Date: Tue May 1 13:43:15 2018 +0000 [Transforms] Code cleanup to match mainline LLVM. commit d690a6b4803840740f8c63e347a61e987c7f6e2e Author: TB Schardl Date: Thu Mar 22 04:28:15 2018 +0000 [Test/Tapir] Updated Tapir-lowering tests to reflect updates to Tapir-lowering code. commit c105771ea55cff76c0e9461b22bb85d624687128 Author: TB Schardl Date: Thu Mar 22 00:41:04 2018 +0000 [TTI] Add a simple cost model for Tapir operations: syncregion_start intrinisics are free, and detach instructions cost 30x a function call. commit 4a38a469b9365a6c63b822f30793adc0fc8df506 Author: TB Schardl Date: Thu Mar 22 00:38:55 2018 +0000 [CilkABI] Fix Cilk ABI code generation to properly store updated pedigree values. commit b45ac63a44d082898d2f58ad95410d8dc092d4b6 Author: TB Schardl Date: Thu Mar 22 00:38:05 2018 +0000 [Tapir/LoweringUtils] Code cleanup. commit a1f8ee6daeff85e56e0c69d4361022d021569cfa Author: TB Schardl Date: Wed Mar 21 21:24:26 2018 +0000 [CSI] Prevent instrumentation of any function in the .text.startup section. Some of these functions are called from global constructors but do not appear in the global constructor list. To properly avoid instrumenting functions that run before CSI is initialized, we defensively skip instrumentation of these functions. commit dd1a84ab4d4d4fedbab84a5383a97413641c4ee6 Author: TB Schardl Date: Wed Mar 21 21:22:33 2018 +0000 [CilkSanitizer] Removing dead string constants. commit 696ab66fd92517a42730393dcc52fe23c1c256da Author: TB Schardl Date: Sun Mar 11 19:30:54 2018 +0000 [DetachSSA] Code modernization. commit f5b42e5f26a3675d2777c745624a7250feb74ec9 Author: TB Schardl Date: Sun Mar 11 19:28:39 2018 +0000 [LoopSpawning] Remove dependence of LoopSpawning on TapirTarget. Added flag to allow use of CilkABI to handle Tapir loops. commit 0d55de162c8a5ee3746380a50ebb4e5efeb08713 Author: TB Schardl Date: Sun Mar 11 19:18:57 2018 +0000 [Tapir] Add code to Tapir lowering passes for inserting and lowering calls to comput Tapir loop grainsizes. commit cecc7465ea6579e496cfa957aeea927f951f2f2a Author: TB Schardl Date: Sun Mar 11 19:01:46 2018 +0000 [Intrinsics] Added intrinsic for computing Tapir loop grainsize. commit 2d5dd83343cb4d5b92683f708c064642d3174678 Author: TB Schardl Date: Sun Mar 11 19:00:54 2018 +0000 [InlineFunction] Check if function inliner would want to modify the exception-handling code for a detach. commit 7c7c7ca7d116062266a8ed4b80513ec78b756505 Author: TB Schardl Date: Sun Mar 11 18:57:02 2018 +0000 [CSI] Add basic functionality to instrument exception-handling code in detached tasks. commit 23fe0b3ffefbdbd60c8ad2d459f63bff567f87d1 Author: TB Schardl Date: Sun Mar 11 18:55:04 2018 +0000 [CilkABI] Updated code for handling Tapir loops using the __cilkrts_cilk_for. commit 085709f420d6f3985511182a97140a0c1562de12 Author: TB Schardl Date: Sun Mar 11 12:17:20 2018 +0000 Updated Clang submodule commit f9e09fc8cf8e80564a33b0201e074e5d13d8f862 Author: TB Schardl Date: Sat Feb 24 22:12:05 2018 +0000 [BitcodeReader] Fixing bug that caused BitcodeReader to fail to process Tapir sync instructions correctly. This change addresses issue #56. commit d1e8b16221b8bbec5ae78b50d4ec309e34694912 Author: TB Schardl Date: Fri Feb 23 06:00:38 2018 +0000 [CSI] Delay getting the dominator tree until after calls might have been transformed into invokes. commit 64d35a218588f0fc75392de0ff5d110bfc9898cb Author: TB Schardl Date: Thu Feb 22 20:54:13 2018 +0000 [CircleCI] Run regression tests serially on the CI, to accommodate its computing resources. commit 037c0c8dddf551f4e74eb5cac64118f7b370182a Author: TB Schardl Date: Thu Feb 22 18:47:16 2018 +0000 [CSI] Convert calls to functions that can throw into invokes, and instrument the exceptional return of each invoke. This commit also includes some minor code cleanup. commit 95f6fc323cabad8673baf084721e27604c8f2446 Author: TB Schardl Date: Thu Feb 22 18:44:05 2018 +0000 [CSI] Adding code to set the basic-block properties that identify landing pads and EH pads. commit 44e2a1d3688802fc8e81f890fbc52a8ac0c74647 Author: TB Schardl Date: Thu Feb 22 18:41:23 2018 +0000 [CSI] Adding property to function exits to specify whether the exit rethrows an exception. commit 436ebf78e234f920513012f6843d41a8495aacf2 Author: TB Schardl Date: Sat Feb 17 03:25:59 2018 +0000 [CircleCI] Updating CilkRTS build test to use updated Cilk Hub clone of Cilk Plus RTS. commit 3d6ac8216ac7e960d124263412775f23f06da8a8 Author: TB Schardl Date: Fri Feb 16 17:34:31 2018 +0000 Updated compiler-rt submodule to include recent CMake fixes. commit 2e3eba2075b0bc2a50e6ba8f1f20229e13db4846 Author: TB Schardl Date: Fri Feb 16 17:04:50 2018 +0000 Updated clang submodule to reflect updated detached-rethrow syntax. commit 61067ddafbd0e4534b832da83244a98dd14875f8 Author: TB Schardl Date: Thu Feb 15 14:59:04 2018 +0000 [Tapir] Adding sync regions to detached rethrows. Updating LoopSpawning and Tapir lowering code to handle nested detached tasks that involve exception handling code. Reorganizing code to better differentiate generic utilities for operating on Tapir instructions (TapirUtils) from more specialized utilities for lowering Tapir instructions (LoweringUtils). commit c990e89a9610c683cfe1e46a002019c00def9ad1 Author: TB Schardl Date: Thu Feb 15 14:52:45 2018 +0000 [CSI] Code cleanup. commit 6b6acaf2e866409e83d99ffd3dfff13bed5fa1fd Author: TB Schardl Date: Thu Feb 15 14:52:28 2018 +0000 [Tapir] Code cleanup and comment fixes for Tapir targets. commit 399f07a27903c35ec6803a97299db040d5ec9792 Author: TB Schardl Date: Thu Feb 15 14:42:59 2018 +0000 [Tapir] Moving Tapir-loop-hint processing to Utils/TapirUtils. commit b3033fdc9c8866e656efaf99917de4410f2e9356 Author: TB Schardl Date: Mon Feb 12 18:45:33 2018 +0000 Updating test files to reflect changes to master. commit 4c482208ceae69e82c77b39d810da1e85f9b73c9 Author: TB Schardl Date: Sun Feb 11 16:37:59 2018 -0500 [Test/Tapir] Renaming test files to match naming conveniton. commit 5079737bc1c0a763efe7adef8689a70506c05dd0 Author: TB Schardl Date: Thu Feb 8 12:17:33 2018 -0500 [Test/Tapir] Adding tests to verify that Tapir can compile the Cilk Plus runtime system. commit 543d467517cf1e7af561c8ea30d040b28213a947 Author: TB Schardl Date: Thu Feb 8 06:00:06 2018 +0000 [Tapir] Handle structure-return parameters correctly in outlined helper functions. This commit addresses issue #38. commit 4f3fb4bc9c88ace89e1d998a35b9b7a08669a0e9 Author: William S. Moses Date: Wed Jan 10 14:39:16 2018 -0500 Add Tapir versioning commit c11a64df3cd68d83f0528a03e32e6b954f404b0c Author: TB Schardl Date: Mon Feb 12 18:11:35 2018 +0000 Updated clang submodule commit 52e93a76b365faba845f2e63105f772935a82b3b Author: TB Schardl Date: Mon Feb 12 18:07:47 2018 +0000 [Tapir] Adding support for lowering Tapir's representation of exceptions. This commit also includes assorted cleanup and code modernization of Tapir's lowering code. commit c7bc4b0b1c408b72a0204f7156afd0e94547cbcb Author: TB Schardl Date: Sun Feb 11 21:44:48 2018 +0000 [Tapir] Re-enabled the '-ftapir=none' option for emitting Tapir without lowering. This flag is intended for testing and debugging purposes. commit ee7aed3fe4a7bd0141c3b61bd4cfa2e231309f38 Author: TB Schardl Date: Fri Feb 9 16:36:39 2018 +0000 [Tapir] Fixes and cleanup to Tapir lowering to process nested detaches correctly. commit 3be026395cdb441fadfc884c1daaa413fcb820e1 Author: TB Schardl Date: Fri Feb 9 14:31:30 2018 +0000 [Tapir] Updating existing analyses and transformations outside of Tapir lowering to accommodate detach-unwind's and detached_rethrow's. commit 65f14e4f97879b8d45c6eb3ba43be821e93cfd90 Author: TB Schardl Date: Fri Feb 9 14:08:21 2018 +0000 [Tapir] Added a new invokable intrinsic, detached_rethrow. This intrinsic serves to separate exception-handling code in a detached task from exception-handling code in the detacher. The detached_rethrow intrinsic identifies a landing-pad desintation in the detacher that will catch an exception thrown by a detached task. Just as the reattach instruction models the ordinary return from a detached task, a detached_rethrow models an exception return, i.e., a resume, from a detached task. Whereas the reattach instruction identifies the normal continuation from its corresponding detach, a detached_rethrow (roughly speaking) identifies the unwind destination of the corresponding detach. commit cbc80e1539dc31a97774ad294a0c4bb69822ab95 Author: TB Schardl Date: Fri Feb 9 13:59:24 2018 +0000 [Detach] Added an optional unwind destination to the detach instruction. Conceptually, this unwind destination identifies a landing-pad continuation where control will return to if the detached task terminates by throwing an exception. commit 512142586a8278540580ed835b4c4bf16bf2fd13 Author: TB Schardl Date: Wed Feb 7 21:56:38 2018 +0000 [Test/Tapir] Adding test for bug where TRE would erroneously eliminate a sync. commit d2980f2f7c3bae0a34170bd9aa776edacb6a0e36 Author: TB Schardl Date: Wed Feb 7 21:55:45 2018 +0000 [Test/Tapir] Adding test for handling the returned parameter attribute when outlining a Tapir helper function. commit fdebbf0b56d85d2fe26df1e3d15e6d48a11af267 Author: TB Schardl Date: Wed Feb 7 21:53:33 2018 +0000 [Test/Tapir] Adding test case for bug involving hoisting a setjmp past a conditional branch. commit 8d6c1c54e33746ead74f7cede1980d33af9d2dc6 Author: TB Schardl Date: Wed Feb 7 20:04:05 2018 +0000 [CilkABI] Add opt flag to prevent inlining of Cilk ABI calls. This flag is only useful for debugging. commit d16197b289040ea2763ccc4b2af649f8fac6e662 Author: TB Schardl Date: Mon Feb 5 14:42:31 2018 +0000 [Cilk] Mark functions with continuations that can be stolen in Cilk as "stealable", and ensure that "stealable" functions have opaque modifications to the stack pointer. commit 21ced30f116bdca167fc6d6b139fe30950bb7ea4 Author: TB Schardl Date: Tue Jan 30 22:18:56 2018 -0500 [TRE] Fixed bug where TRE would not replace a removed sync if it failed to eliminate a recursive tail. commit 55e5ff530ee5c7811b3a568b8b65f1fc61cb0cda Author: TB Schardl Date: Tue Jan 30 22:17:48 2018 -0500 [Outline] Remove the returned attribute from function parameters for outlined helper functions, since these helper functions return void. commit a6b4a3a094e95ed1394909a86453819d56ea3720 Author: TB Schardl Date: Fri Jan 19 03:23:00 2018 +0000 [Cilk] Optimizing ABI implementations, using atomic instructions. commit 41127a33ed46c5d36265281919dd3590ac96a1eb Author: TB Schardl Date: Fri Jan 19 03:06:02 2018 +0000 [CilkABI] Code cleanup and modernization. commit 8cba4e227ca41c3bff5ff3b63c89d7263d48f7a9 Author: TB Schardl Date: Fri Jan 19 02:40:33 2018 +0000 [Tapir] Reworking specification of Tapir target to simplify code and support opt command-line flags. commit 1f9426dfdbd8a75efdd7d8df08796bc0d30b5cbb Author: TB Schardl Date: Thu Jan 18 23:26:33 2018 +0000 [CilkSanitizer] Added function attribute for Cilk sanitizer. commit e9be04b4007285822573363fc2c3d929fe9f806c Author: TB Schardl Date: Sat Jan 13 06:11:24 2018 +0000 Updated clang and compiler-rt submodules. commit 4476f07d5bb6d97b1fe971ec72ee95c6bab77ae6 Author: TB Schardl Date: Sat Jan 13 05:39:50 2018 +0000 Bug fixes for initial rebase onto version 6. commit e30ae483a294ed2ab02c0a43511ecd4f1cf4dd04 Author: TB Schardl Date: Wed Jan 10 05:19:30 2018 +0000 [CilkRABI] Relaxing the atomic ordering of the fence used in __cilkrts_detach. commit 5972102dc994bcfe73866c5d02cf091b4cf9c47a Author: TB Schardl Date: Mon Jan 8 15:22:35 2018 +0000 [CilkABI] Working around GCC bug in namespace handling. commit 2a80d8dcdb52406828741968150fb29d54a26567 Author: TB Schardl Date: Sun Dec 17 02:48:16 2017 +0000 [CilkRABI] Adding CilkR Tapir target. commit 0e419ca1c5825a77cc4907224c9e65eda1c15abb Author: TB Schardl Date: Sun Dec 17 02:33:35 2017 +0000 [CilkABI] Additional fix to allow Tapir to target different Cilk runtime systems. commit 5fcc34a7829170c69879790685c182474852dbec Author: TB Schardl Date: Sat Dec 16 01:38:01 2017 +0000 [Tapir] Code refactoring to support the option of lowering Tapir to the CilkR runtime system. commit dcaa0204dde6d7e06a7accf9d84babdda4cfadf9 Author: TB Schardl Date: Wed Dec 13 21:30:58 2017 +0000 [CSI] Fixed bug where debug information was not always set on CSI hooks for Tapir constructs. commit 97ad59dc3e7d815c26e5a175cfb21b5a6b78fd07 Author: TB Schardl Date: Thu Dec 7 02:33:38 2017 +0000 [CSI] Code cleanup. commit 19efb0e913d21fab78b8240c465c3756f76da7d4 Author: TB Schardl Date: Tue Dec 5 23:47:04 2017 +0000 [CSI] Bug fix to ensure correct initialization of CSI pass. commit 345b88c2004ef761de130e940b24340158286c80 Author: TB Schardl Date: Wed Sep 27 01:50:09 2017 +0000 [CSI] Adding Tapir instrumentation to CSI. Adding forensic table to CSI that records the number of LLVM IR instructions in each basic block. commit d24410060f91212bb6a01f3391a74e3501bd7075 Author: TB Schardl Date: Sun Dec 3 15:26:46 2017 +0000 [GVN] Bug fix to GVN to handle case where multiple detached tasks share the same continuation block. commit 6a51e69bcbaeca4f9f9606fdbf658eafd4ce1f94 Author: TB Schardl Date: Mon Nov 20 19:04:57 2017 +0000 [LoopSpawning] Fixed bug where loop-limit computation involved a smaller type than some of the loop's induction variables. commit 4c3e3bbcbe0a2733f15341374f8edccad1533dbd Author: TB Schardl Date: Sun Oct 22 15:43:50 2017 +0000 [CSI] Avoid instrumenting calls to LLVM intrinsics that don't lower to anything. commit 7c7bd743c56a748f85326afbae64e594d40db81d Author: William Moses Date: Mon Dec 4 21:09:36 2017 -0500 Fix test runner (#27) commit 73af4bb3b080f1406ba4c499483ff762fc23f104 Author: William Moses Date: Mon Nov 27 14:32:37 2017 -0500 Compiler RT docker fixes (#25) * Add CSI Compiler RT * Add nice docker commit 1bbfe72829d6be08d7c20162c59562036b5f5e0a Author: William S. Moses Date: Wed Oct 11 17:25:56 2017 -0400 Fix OpenMP Backend bug commit 6f6542de9314b9609b0a0d419e451a31ec6fbbdb Author: TB Schardl Date: Mon Oct 2 01:47:50 2017 +0000 [LoopSpawning] Adding support for LoopSpawning to extract a grainsize value from the hints associated with a loop. commit 455d301d822f3eff4a80f0359acfcd90a1e08610 Author: William Moses Date: Sat Oct 7 23:41:30 2017 -0400 OpenMP Backend for Tapir (#18) * Initial move towards multi backend * [WIP] OpenMP backend * Finish omp working v1 * Add openmp * Working OpenMP * Remove simplifycfg option * Add fast omp option commit 35428f40f3b1c19db8ee116b37496bf48693dd25 Author: William S. Moses Date: Tue Sep 12 17:10:59 2017 -0400 Rhino LICM commit 68757030505bca4e7408d9e6ea1ff37dfaa76ba5 Author: William S. Moses Date: Tue Sep 5 01:36:38 2017 -0400 Starting Rhino CSE commit 9eb6949092c0c19112faefd55cd2ad8bb635af8f Author: Victor A. Ying Date: Thu Aug 31 10:28:27 2017 -0400 [Outline] Accept null ExitBlocks in findInputsOutputs(). Solving more of the same problem as in issue #14. commit 2c65f4561bad38e6a4738ea2ec7561155c259d7e Author: Victor A. Ying Date: Thu Aug 31 09:58:42 2017 -0400 [Outline] Accept null ExitBlocks in CloneIntoFunction(). This fixes issue #14. commit c0a38efa745b122f6a505f9116f88e5da46587d7 Author: William S. Moses Date: Tue Aug 8 22:38:40 2017 +0000 Squashed commit of the following: commit 9eef73e8b7b5dab5d8e04a0fa584fd765e5b1d13 Author: TB Schardl Date: Fri Aug 4 01:43:13 2017 +0000 [TRE] Fix bug with Tapir modification of TRE that was causing unit tests to fail. commit 92b16128f980b6683cb53a324480d7305f4327d4 Author: TB Schardl Date: Thu Aug 3 13:10:01 2017 +0000 [README] Attempting to clean up README file. commit fa242e0f01133707c3a483cfabedf3ee28abba7a Merge: a8e2b795fb3 f55a27066ac Author: TB Schardl Date: Thu Aug 3 12:52:13 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit a8e2b795fb34c87cd2c884235c3b50be0c17c3e7 Author: TB Schardl Date: Thu Aug 3 12:49:10 2017 +0000 [README] Updated README. commit f55a27066ac03e39e6a01ca30e86bc48df76fa7e Author: William S. Moses Date: Tue Aug 1 20:17:47 2017 +0200 Add CircleCI commit 964b5bea84c59cdc7e27bc07e98f12edc821c4fc Author: TB Schardl Date: Wed Aug 2 21:35:11 2017 +0000 [LoopSpawning] Correctly handle Tapir loops where the loop body uses the variable storing the number of loop iterations. Fixes #13 commit 8d4f443d9c9b78478279d598c4eb9abd79db1e59 Merge: 452aac7e148 ef122d645a8 Author: TB Schardl Date: Wed Aug 2 21:35:22 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 452aac7e14852491121f7ca26f24f420414a5245 Author: TB Schardl Date: Wed Aug 2 21:35:11 2017 +0000 [LoopSpawning] Correctly handle Tapir loops where the loop body uses the variable storing the number of loop iterations. Fixes #13 commit ef122d645a83c9ad9ee743329208ee001071a4f2 Author: William S. Moses Date: Tue Aug 1 20:17:47 2017 +0200 Add CircleCI commit 9be75a22ad015c307665d277994651671a15ae60 Author: TB Schardl Date: Mon Jul 10 15:57:49 2017 +0000 [CSI] Bug fixes and refactoring of the CSI instrumentation pass. commit 6ce5f2f27b1bc2d92e48420376c2a37d1608f3a1 Author: TB Schardl Date: Mon Jul 10 13:37:39 2017 +0000 [Tapir] Allow Tapir lowering to Cilk to fill in missing definitions of internal Cilk types, including __cilkrts_worker and __cilkrts_pedigree. commit 631e4626d2ba614eaf8a68113c2fdf02f9f8e246 Author: TB Schardl Date: Fri Jun 30 21:33:54 2017 +0000 [DetachSSA] Initial implementation of an analysis pass that tracks the creation and synchronization of detached tasks. This analysis is based on MemorySSA. commit 923a9052c95c43df1405fad56f2cb1ef12a47412 Author: TB Schardl Date: Tue Jun 27 21:54:51 2017 +0000 [Tapir] Adding support for sync regions. A sync region is designated by a token emitted by a call to @llvm.syncregion.start. The detach, reattach, and sync instructions all take this token as a parameter. A sync instruction in a sync region SR only waits on computations detached from detach instructions in the same sync region or in a detached descendant thereof. By convention, a call to @llvm.syncregion.start occurs in an entry block, that is, either the entry block of a function or the entry block of a detached sub-CFG. For Cilk programs, a sync region is started for any function that performs a _Cilk_spawn or _Cilk_sync. A separate sync region is also started for each _Cilk_for in the function. Sync regions address two issues with sync instructions. First, with sync regions, the implicit sync at the end of a _Cilk_for only waits on the parallel iterations of that _Cilk_for, not on any other spawned computation within the function. Second, when a function is inlined, any _Cilk_sync performed by that function will not erroneously wait on detached computations in its caller. This commit includes simple cleanup passes involving sync regions. One form of cleanup removes sync instructions in sync regions that contain no detach instructions. Another form removes empty sync regions, i.e., calls to @llvm.syncregion.start whose produced token is never used. Future work will analyze sync regions more carefully and combine them when it is deemed safe. commit 9b55aac80aca2a520ba7627a020af413be18a29f Merge: 9b5abba8e85 eece7bcb178 Author: TB Schardl Date: Sat Jun 3 12:42:01 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 9b5abba8e85b01c08d49885fdc6d871ed0e522e9 Merge: 51a4df5f3e5 6ef5e10ad7e Author: TB Schardl Date: Wed May 31 02:07:52 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 51a4df5f3e536a65c0a926ee7c87eb47c80aec7f Merge: 6f69cdf478c 0559b4fa45c Author: TB Schardl Date: Tue May 30 18:19:52 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 6f69cdf478cc2801c74964e3a233ad46d16245cc Author: William S. Moses Date: Mon May 15 01:15:30 2017 -0400 remove Rhino print commit d719d172fd8967cccb6625ff1ec54e439cdfe989 Merge: d2b4d301879 2db0ffd4753 Author: William S. Moses Date: Mon May 15 01:04:30 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit d2b4d301879c0a75cbbd9d7c49e51581543ff08b Author: William S. Moses Date: Mon May 15 01:04:14 2017 -0400 pushing rhino flag commit 2db0ffd47534ee35deaea877d73d8484cb94c01f Author: Douglas Kogut Date: Mon May 15 00:24:54 2017 -0400 spawn unswitch commit 8f57e0739bf9fc6736472c89f91a533630efd5c3 Merge: 9660ce4abc0 be7eafc7179 Author: William S. Moses Date: Sun May 14 17:36:17 2017 -0400 Merge branch 'master' of github.com:wsmoses/Parallel-IR into 6898 commit 9660ce4abc060598a20b7c5d30a217bdc3af569e Merge: 002fb57bb06 780934e4b6a Author: William S. Moses Date: Sun May 14 17:35:58 2017 -0400 Merge branch 'master' into 6898 commit 002fb57bb069f18319ceab0d287c22166999a766 Merge: 35669cce54f acefa6d5a77 Author: William S. Moses Date: Sun May 14 15:32:41 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit acefa6d5a77cad0cb2da8f5c6cfe3af1ca15129e Author: Douglas Kogut Date: Sun May 14 14:58:08 2017 -0400 spawn unswitch commit be7eafc7179b8591b0007a25a2e3aae31cfc7818 Author: TB Schardl Date: Tue May 9 21:34:49 2017 +0000 [Mem2Reg] Updated Mem2Reg to find the entry blocks of the function and all detached sub-CFG's more efficiently. commit 12f929ae136d57fd9e744bc2dac8c072d01e2053 Author: TB Schardl Date: Tue May 9 21:15:58 2017 +0000 [CilkABI] Marked additional loads and stores to CilkRTS stack frames as volatile. Fixed bug in extracting exception-handling exit blocks for detached CFG's. commit 9bf9a4d58c9f3a09164b8a86202bcee2f5abf553 Author: TB Schardl Date: Tue May 9 21:14:33 2017 +0000 [InstCombine] Fixed bug to prevent InstructionCombining pass from sinking operations that read memory across Tapir instructions. commit 719872be7ce9d8cdbc7036c6eb7d3d77ebeff5cf Merge: f63b0fed940 10826f2652f Author: Douglas Kogut Date: Fri Apr 28 20:39:49 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit f63b0fed9406ac9f5f8b54626a9c6ef965cceaba Author: Douglas Kogut Date: Fri Apr 28 20:39:34 2017 -0400 pushing measuring scripts commit 991ca791848c9936677a0b7184a77cf0eaf6734d Author: TB Schardl Date: Wed Apr 26 12:17:07 2017 +0000 [LoopSpawning] Cleaning up code for handling exceptional exits. commit 10826f2652fea87d11ec166954c2d7b02917c21d Author: Jiahao Li Date: Tue Apr 25 23:24:56 2017 -0400 Alters sync elimination pfor microbenchmark. commit 9d5172300fcd2528dc4db210beccfa6cecb7816f Author: Jiahao Li Date: Tue Apr 25 23:07:07 2017 -0400 Makes LoopFusePass work. commit 46720980313325bf80262b8fd447db8e90f1c307 Author: TB Schardl Date: Wed Apr 26 00:10:42 2017 +0000 [LoopSpawning] Bug fix to find all exception-handling exit blocks of a Tapir loop. commit 48e7791f51c0a3b0fc27cc280e458892dac30fbd Author: TB Schardl Date: Tue Apr 25 01:30:48 2017 +0000 [Tapir] Preliminary support for C++ exceptions on Linux. commit 4613a6461de60516a6242270e4c6cd7beb1c5bec Author: TB Schardl Date: Tue Apr 25 01:28:09 2017 +0000 [CSI] Updated CSI pass to support separate property types per IR object. commit d5331895cb2d1437b7788469ac72c731b65a949b Author: Jiahao Li Date: Sat Apr 22 15:21:03 2017 -0400 Have makefile for sync_elimination_pfor_mb emit .ll for the sync eliminated version. commit 3b2b3c3429af3f1a173970cef45844639d35361b Author: Jiahao Li Date: Sat Apr 22 15:09:04 2017 -0400 Cleans up makefile for sync_elimination_pfor_mb. commit 21aa2bbee01f1dbc86681a7ed78b7cfd8fd611d5 Author: Bojan Serafimov Date: Sat Apr 22 14:57:32 2017 -0400 Fix compile error commit 0c5e6d15f12288dc29e9f08ff9d011c1204f69ba Author: Jiahao Li Date: Sat Apr 22 14:45:38 2017 -0400 Fixes sync_elimination_pfor_mb micro benchmark. commit a387e9f3e16ab5253eec663bbb56c246e4dbda55 Author: Jiahao Li Date: Sat Apr 22 14:26:06 2017 -0400 Fixes SyncElimination blow up with function calls. commit 44e8409f071578546b572b6dd807a83092867bfa Author: Bojan Serafimov Date: Mon Apr 10 12:06:51 2017 -0400 Fix tests commit adeb3eaaf5af3d9c816db1a704324c9f715a0277 Author: Jiahao Li Date: Mon Apr 10 11:46:36 2017 -0400 Handles instructions with null call sites. commit 96f24b65e5a4634c8a78ac0e53dd552fe46d185d Author: Bojan Serafimov Date: Mon Apr 10 10:19:42 2017 -0400 Ignore sync instruction in rosetta commit d874567d6e6cdfc88c0faab3122975046162ec09 Author: Bojan Serafimov Date: Tue Apr 4 19:14:29 2017 -0400 Add nested loop test commit 8f7734960776d31ddcb0cf690da837c3f7ee9229 Author: Bojan Serafimov Date: Fri Mar 17 17:39:58 2017 -0400 Fix bug in FindRosetta commit e0bac90f990423a17e245cd6cb2d9f9f2b387951 Author: Bojan Serafimov Date: Fri Mar 17 17:03:16 2017 -0400 Add test cases commit 7ccc4c9454b80ef03f14a0c03d86fceea2309581 Author: Jiahao Li Date: Fri Mar 17 16:57:54 2017 -0400 Fixes sync elimination test. commit b5f16cfaf2ce8c9311104f356522c527cfe0b8ba Author: Jiahao Li Date: Fri Mar 17 16:51:37 2017 -0400 Removes incomplete sync elimination test. commit 344d075d08c6d23be99373b1b65a94fb6f92701d Author: Jiahao Li Date: Fri Mar 17 16:47:29 2017 -0400 Removes function renaming in sync elimination. commit 4045b1f2bd1d4e1ff6527bdc4349d9938e188463 Author: Jiahao Li Date: Fri Mar 17 16:15:20 2017 -0400 Fixes loop condition error in sync elimination. commit 7eab317e1436d2fc456f0f625ef4888577c53bec Author: Bojan Serafimov Date: Fri Mar 17 16:33:40 2017 -0400 Fix tests commit 2c6412e1a4bb92a5fc86f63803a52ea22c43aa05 Author: Jiahao Li Date: Fri Mar 17 14:54:13 2017 -0400 Implements legality check for sync elimination. commit a57ac4cafdfe845f0c90cc0611705c38f87f1905 Author: Bojan Serafimov Date: Fri Mar 17 16:05:14 2017 -0400 Add basic SyncElimination tests commit a7c6bdec1a3562a9333e06497e362ab5e8e45613 Author: Bojan Serafimov Date: Mon Mar 13 11:09:06 2017 -0400 Implement sync removing commit 271c65cf91c5a2223ebac864cb55d6137d6d00c4 Author: Jiahao Li Date: Thu Mar 9 16:59:16 2017 -0500 Implements Vegas-set finding for SyncElimination pass. commit 72827d0cc4ef8b3fb556bdb4660c6b0891849b4f Author: Jiahao Li Date: Thu Mar 9 15:58:45 2017 -0500 Implements Rosetta-finding part of SyncElimination pass. commit df4c672499f76bcbfdf93806755e6f9ff15035f6 Author: Jiahao Li Date: Thu Mar 9 15:08:28 2017 -0500 Cosmetic cleanup. commit 2682b3bf34c4efd7fc86e0af26d3a0b1dffc108f Author: Bojan Serafimov Date: Wed Mar 8 00:52:22 2017 -0500 Add SyncElimination pass commit 3856a31e3af623255498bc878b750e82c90a34b7 Author: Jiahao Li Date: Sat Apr 22 16:27:38 2017 -0400 Enables LoopFuse by default. commit 6017d8b2a125a66cb418d247281433a5665ab249 Author: Jiahao Li Date: Sat Apr 22 16:27:26 2017 -0400 Rebases LoopFuse to compile on the current code base. commit 367d9d916cbaf9d2433d267bf9c70be772fe8af7 Author: Jiahao Li Date: Sat Apr 22 16:04:20 2017 -0400 Replaces LoopAccessAnalysis with LoopAccessLegacyAnalysis in LoopFuse. commit bb0b29851651bc1d122b7aed839a58edb4e656ce Author: Jiahao Li Date: Sat Apr 22 15:40:47 2017 -0400 Applies https://reviews.llvm.org/D17386 for Loop Fusion Pass. commit 3ce522e822ad2a0b047c0cc905cf59b8f4247d26 Author: Douglas Kogut Date: Sat Apr 22 14:11:36 2017 -0400 pushing spawn work commit 0dd0df9b42bac64d82ffe5035f6d4f5d7b2dd2b0 Author: TB Schardl Date: Thu Mar 30 12:40:37 2017 +0000 [PassManager] Re-enabling passes that happen after optimizations when Cilk is not enabled. commit 511ba02c8ccb2bf15a0791007229389352bffef9 Author: TB Schardl Date: Thu Mar 16 14:25:49 2017 +0000 [Tapir] When outlining, propagate available alignment information to the parameters of the outined function. commit 4722cecdb2cef0b0ab84c08f65ae296bb4c01a2f Merge: 285ff461789 780934e4b6a Author: TB Schardl Date: Fri Mar 10 20:18:23 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 285ff4617892da4132f4a0aded992dcc4c5af6d5 Author: TB Schardl Date: Fri Mar 10 20:17:05 2017 +0000 [Tapir] Fix to properly maintain allocas in the entry block of a detached context. These changes ensure that every detached context has an entry block with just one predecessor. These changes also move allocas among entry blocks during function inlining and the outlining process for lowering Tapir. These changes also remove syncs associated with parallel loops after outlining. commit 489f0a4673d2b0364556382569e421fed347d301 Author: TB Schardl Date: Fri Mar 10 20:14:03 2017 +0000 [Local] Bug fix to make the GetDetachedCtx routine to properly return the detached BB at the start of a detached context. commit cd7e9f3c2d840182ab82830218703b78c657d1b0 Author: TB Schardl Date: Fri Mar 10 20:11:56 2017 +0000 [SimplifyCFGPass] Code cleanup and comments. commit 35669cce54f33447d1f12423e71536ab31cf02e5 Merge: 1fae2a923fb 52889bc3118 Author: William S. Moses Date: Wed Mar 8 11:33:46 2017 -0500 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit 780934e4b6a8054900b774d9405c0dd426bd23be Author: William S. Moses Date: Tue Mar 7 18:08:44 2017 -0500 Parallelize / Shorten compilation commit 4cc8071621e2c159a755a594bdb5dde9fbdfe74d Author: William S. Moses Date: Tue Mar 7 17:37:28 2017 -0500 Fix optimized llvm build commit 26007676a05e6c0445a0971f5bbfb0a2b2e9c47b Author: William S. Moses Date: Tue Mar 7 17:31:40 2017 -0500 Updated binary commit 6917c16e028fb03a608ba2e2f33ce48c68900b92 Author: William S. Moses Date: Tue Mar 7 17:21:27 2017 -0500 Faster cmake and autobuild matrix commit 088941d05808f63865028347f4fcd3cbc849ce08 Author: William S. Moses Date: Tue Mar 7 16:56:44 2017 -0500 Remove old cmake commit c558e05a3917b7be37490cd45b6c2d9fc153adbc Author: William S. Moses Date: Tue Mar 7 16:55:17 2017 -0500 Print directories for debugging script commit 074121e15927e674b16e2656913ecd08d557a422 Author: William S. Moses Date: Tue Mar 7 16:45:52 2017 -0500 Leave directory in autobuild after cmake commit 30a221e0a04ae4dae0575a092800799e7aa7792f Author: William S. Moses Date: Tue Mar 7 16:38:07 2017 -0500 Build without parallel option commit 7a7d719c26e78e049093f1869eb6573e7cb3e529 Author: William S. Moses Date: Tue Mar 7 16:32:07 2017 -0500 Build newer cmake from source commit 24f129bf4857357c90f8458c2ce09b60ab112b36 Author: William S. Moses Date: Tue Mar 7 16:24:00 2017 -0500 Correct ppa commit e2bc0fc2d7edc08fb427b6f0a30862c602e57dfb Author: William S. Moses Date: Tue Mar 7 16:21:28 2017 -0500 Change CMake to sourceline commit c6249f0bce0d9906f5d669c6d44d15f5977e09d3 Author: William S. Moses Date: Tue Mar 7 16:16:37 2017 -0500 Attempt newer CMake commit fe47a0078d432ee911504fa05c1af0652122dce7 Author: William S. Moses Date: Tue Mar 7 16:08:27 2017 -0500 Build PClang along with Tapir commit 8ee564cae3bbb672546427bab5137b90ce2fdc17 Author: William S. Moses Date: Tue Mar 7 16:07:36 2017 -0500 Build intel runtime using the Tapir compiler commit 6750684c7007e0e6ea0300498e7196cf68c52176 Author: William S. Moses Date: Tue Mar 7 16:00:50 2017 -0500 Add configure to cilk runtime building commit 3f3b46840218f1629f1183b1ef0772414ca145c2 Author: William S. Moses Date: Tue Mar 7 15:57:18 2017 -0500 Add make to dependency list commit bd6f8df75f130bcf260fc4a3102d73341d21dc1b Author: William S. Moses Date: Tue Mar 7 15:54:50 2017 -0500 Add cilk runtime building commit 6372499258146bf9da15f0153c9e4f4d288578cc Author: William S. Moses Date: Tue Mar 7 15:42:22 2017 -0500 Change autobuild cmake version commit 9fec173620bf1c3c964292485f007a69fc05ca72 Author: William S. Moses Date: Tue Mar 7 15:39:43 2017 -0500 Change autobuild distribution commit 1fae2a923fb632a6eb1dabc4826e3b2533735273 Author: William S. Moses Date: Tue Mar 7 15:35:20 2017 -0500 Relist as package commit 52889bc31182f3faebcfce24918670967b5b96f6 Author: Douglas Kogut Date: Mon Mar 6 12:11:10 2017 -0500 pushing example opt pass commit fe692e250aa8a78435200882ebb89c17f881c4d3 Author: TB Schardl Date: Fri Mar 3 13:25:57 2017 +0000 Ignoring debug build directory. commit 69fa592b7e889be513f1004b1f13dd450a1be378 Merge: 3c56ed06c17 df445de9e82 Author: TB Schardl Date: Fri Mar 3 13:20:52 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 3c56ed06c17f764e2c1221df60e8ee45199b1577 Merge: 4611d796dea 2d562fe758b Author: TB Schardl Date: Fri Mar 3 13:19:05 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit df445de9e8252e5aff8a6d7645128df71b3bd45f Author: William S. Moses Date: Thu Mar 2 00:37:50 2017 -0500 Correct CI build script commit efa60d2d710c5697f6be5737898897cfb56b4509 Author: William S. Moses Date: Wed Mar 1 16:07:01 2017 -0500 Force travis-ci to rebuild commit 66ed989e47c276699462c761b0e4f2b68ef5d951 Author: William S. Moses Date: Tue Feb 28 16:18:35 2017 -0500 Initial attempt at adding Travis autobuilder commit b8a1f3fb7874d52fedb6db8a786695521a846709 Merge: 518873a5b44 a3bd7557fb6 Author: William Moses Date: Tue Feb 28 11:49:18 2017 -0500 Merge pull request #12 from YingVictor/master [LowerToCilk] Fix memory leak. commit a3bd7557fb661ef0980599d430e7cd0a52f7f385 Author: Victor A. Ying Date: Tue Feb 28 11:41:08 2017 -0500 [LowerToCilk] Fix memory leak. SmallVector of NewHelpers needs to be deleted. commit 518873a5b44c8ffc37282cb3887a1518525eca7f Merge: 645daf3405c fb71c4aa6b4 Author: William Moses Date: Sun Feb 26 17:29:34 2017 -0500 Merge pull request #11 from YingVictor/master Two minor fixes commit fb71c4aa6b408ce59e095b3d770ba01ab4eb9f51 Author: Victor A. Ying Date: Sun Feb 26 16:53:55 2017 -0500 [include/llvm-c/Transforms/Tapir.h] Fix function name mentioned in comment. commit 2e658275b9935e536f86aec6b7f911b6c5e374cc Author: Victor A. Ying Date: Sun Feb 26 16:46:18 2017 -0500 Properly remove traces of clang submodule. Removing a git submodule requires more than just deleting the the entry in the .gitmodules file, as was done in the previous commit. It also requires deleting the special directory entry from the git index, which should be done using some variation of "git rm", such as: git rm --cached path/to/submodule Which is what I did in this commit. commit 645daf3405c01f6e262373a6c849466f09162f44 Author: William S. Moses Date: Fri Feb 24 15:35:50 2017 -0500 Remove clang submodule commit c9830e69c572885f6bfc7a74179a8e7efb6c851e Merge: 3ad6c9cb76e 4611d796dea Author: William S. Moses Date: Fri Feb 24 15:33:45 2017 -0500 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 3ad6c9cb76eba2c5fbf7a5c8416ac28793d6455e Author: William S. Moses Date: Fri Feb 24 14:10:50 2017 -0500 Update clang to stable commit 4611d796dea964dea884c34cadcef14b256fbe56 Author: TB Schardl Date: Tue Feb 21 19:46:22 2017 +0000 [CodeExtractor] Removed unused function from CodeExtractor. commit 73b2a05f9106a888ae92fbd9d89fd36be310bcce Author: TB Schardl Date: Sun Jan 15 14:19:32 2017 +0000 [LoopSpawning] Restored warnings when LoopSpawning fails to transform a marked loop. commit 710c06b2ffad2727ff751113b90b9905f4a3c845 Author: TB Schardl Date: Sun Jan 15 14:18:54 2017 +0000 [CodeExtractor] Removing old code for dealing with debug symbols. commit ab75cf00f520c07d4dafa58328fa809780ac146b Author: TB Schardl Date: Fri Jan 13 22:25:29 2017 +0000 [LowerToCilk] Renaming Detach2Cilk to LowerToCilk, as part of some code cleanup. commit 2748779e158be086e9fa52300ccd5fcded978044 Author: TB Schardl Date: Wed Jan 11 13:59:02 2017 +0000 Updated associated version of Clang. commit 738a76c83c83017faaeeaf959fb0c45b4586b08f Author: TB Schardl Date: Wed Jan 11 13:31:23 2017 +0000 [test] Adding some simple regression tests for Tapir. commit 5b63394d73f1d65ec6e338ed9ba8063895d8ef4e Author: TB Schardl Date: Mon Jan 9 19:11:44 2017 +0000 [Tapir/Outline] Fix debug build. commit df3dcb657228c40bff3ee7cab30944ed9e116021 Author: TB Schardl Date: Mon Jan 9 02:31:01 2017 +0000 [Tapir/Outline] Minor code cleanup. commit facf7c87283b30b139fe75fbd4caacfc32c0fb37 Author: TB Schardl Date: Mon Jan 9 02:29:07 2017 +0000 [Detach2Cilk] Inline __cilk functions into generated helper functions. commit c32adbf10f18c9a52e10de2e046329f67f635699 Author: TB Schardl Date: Sun Jan 8 22:48:22 2017 +0000 [LoopSpawning] Code cleanup for release build. commit 3b460341f6a21344ddbc11100cd75ef079bcd8ee Author: TB Schardl Date: Sun Jan 8 22:41:02 2017 +0000 [Detach2Cilk] Fixed creation of Cilk stack frames for release build. commit 4bcdb952154d0daf4f18384cceda7f72e7b2542d Author: TB Schardl Date: Sun Jan 8 20:42:48 2017 +0000 [SROA] Minor code cleanup. commit 3c73fb9bf4d241c96c31f10c3a89074ffbf30774 Merge: 0d6f0aad70a 18687546b92 Author: TB Schardl Date: Tue Jan 3 19:24:51 2017 +0000 Merge branch 'new_lowering' commit 18687546b9276fcb76c619193ee46b93f05a7001 Author: TB Schardl Date: Tue Jan 3 17:18:12 2017 +0000 [Detach2Cilk] Code cleanup. commit 2a7c78c09452762cc784ac4cf92381340830a90c Author: TB Schardl Date: Tue Jan 3 16:59:48 2017 +0000 [LoopSpawning] Added support for Tapir loops with exit blocks terminated by unreachable. commit a1af329428f71f12decbe8776e2d9b4d9b377c63 Author: TB Schardl Date: Sat Dec 31 17:06:01 2016 +0000 [CSI] Fix formatting of CSI pass. commit 08b3602ddb14e7bbe7fe78faa7a12c4fbd43e431 Author: TB Schardl Date: Sat Dec 31 17:05:07 2016 +0000 [CSI] Add function names to FED tables. commit 1672db6417856784850c9aaa5f879c1bb5f6f539 Merge: a22c19d21b9 56516028d8b Author: TB Schardl Date: Sat Dec 31 14:59:27 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit a22c19d21b991cd92e7f64103166f66f0f89eabd Merge: 04b71642665 7f580b605b2 Author: TB Schardl Date: Tue Dec 20 14:25:09 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 04b716426657e5cf52c69e6e6953492e1e3b7434 Author: TB Schardl Date: Tue Dec 20 14:09:15 2016 +0000 [LoopSpawning] Switching LoopSpawning back to implementing divide-and-conquer scheduling directly. commit c03b7f076ab44c6e37edb033cf1b16950740fca7 Merge: 0cc6919dafd eaf3712d06e Author: TB Schardl Date: Mon Dec 19 21:47:05 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 0cc6919dafdf326efdfa275f66556ad1a9abfe67 Author: TB Schardl Date: Mon Dec 19 20:34:25 2016 +0000 [Outline] Cleaning up the code. commit 747d1e8211d2c6ce8eeee40a79d3f684e9747e1c Author: TB Schardl Date: Mon Dec 19 20:30:37 2016 +0000 [LICENSE] Updated license to add copyright for changes to implement Tapir. commit 0d6f0aad70ae0b75a4f71567bd098703070c3c56 Author: William S. Moses Date: Sat Dec 17 23:15:13 2016 -0500 add clang submodule commit 463af403bf33e14b759a60377c95ffe3d1f74382 Author: TB Schardl Date: Tue Dec 13 02:28:54 2016 +0000 [LoopSpawning] Keeping two versions of divide-and-conquer loop spawning around. commit fcae33a06441a48081c463f74d12fc5f6b9ce68a Author: TB Schardl Date: Tue Dec 13 02:21:17 2016 +0000 [PassManagerBuilder] Modification to support more faithful reference pipeline for PPoPP. commit 6a8c5d26ad24a6f35ca8afcc17f18ea89f790f09 Author: TB Schardl Date: Sun Dec 11 22:29:25 2016 +0000 [LoopSpawning] Fixed bug in computing loop count for using Cilk ABI call. commit b8af887cac2f664ae780631cd14ea2a194ea042c Author: Ubuntu Date: Sun Dec 11 08:19:56 2016 +0000 cilk abi loopspawning commit 217f4eafa2694468cb3817fb65e05b95ddd1d0b3 Author: TB Schardl Date: Sat Dec 10 20:39:12 2016 +0000 [CilkABI] Bug fix to allow proper lowering of when a loop is the entry of a detached CFG. commit 82cb28db1a9877d923da8a038c8f33a9079b6121 Merge: 8a4ac0d5d6e 05bdd2ebfe8 Author: TB Schardl Date: Mon Nov 28 21:20:47 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 8a4ac0d5d6ee455a6000fd60cd37018642a2b5ba Author: TB Schardl Date: Mon Nov 28 15:58:29 2016 +0000 [LoopSpawning] Refactored to be a FunctionPass, instead of a LoopPass. More work is needed for this pass to legally add functions to the current Module. commit 7f96f2c38f8233502a50c6bfd66257be0915ea41 Author: TB Schardl Date: Mon Nov 28 15:55:11 2016 +0000 [LoopSimplify] Modified to ensure that the preheader of a loop is not terminated by a sync. commit f84012859a7fd293377b87a2c0d95d2cbd75aee0 Author: TB Schardl Date: Mon Nov 28 15:53:05 2016 +0000 [Tapir/Outline] Cleaning up commented-out code. commit 2e932359c6f63a76e6a040bdf577ca9f162ddd8f Author: TB Schardl Date: Mon Nov 28 15:52:22 2016 +0000 [BasicBlockUtils] Modified SplitEdge to keep sync instruction in original block. commit 32aeb36a6f76b69247231a1b57a9b66a32627ed1 Author: TB Schardl Date: Mon Nov 28 15:50:19 2016 +0000 [Detach2Cilk] Making Detach2Cilk a ModulePass, instead of a FunctionPass, so it can safely add functions to the module. commit 6ab23d5f49ab42f2d3074523570cf72cd7ee6d02 Merge: 56598980fc5 52894d83e1a Author: TB Schardl Date: Sat Nov 26 17:23:45 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit e189e6c97da75849d75b512dd5513c0ec5a09af4 Merge: 6952888faaa c3bdfe57eb1 Author: Ubuntu Date: Thu Nov 24 17:07:50 2016 +0000 Bring up to date with most recent llvm commit 56598980fc58d0bd68e2957eb45371eb23245995 Merge: 6a33185a05c 3e65807a6f1 Author: TB Schardl Date: Wed Nov 23 18:31:46 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 6952888faaaf797beb00934eee0c99f85fbfeea5 Merge: e79c0d93864 e372554cd73 Author: TB Schardl Date: Fri Nov 11 21:42:16 2016 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit e79c0d93864a579bf6b865802e182a7b80d9ea48 Author: TB Schardl Date: Fri Nov 11 21:34:37 2016 +0000 [PassManager] Ensure that extensions to the pass manager that are intended to run last only run once on Tapir programs. commit 6a33185a05c72739458a92e13a103ed4b3ae4b97 Author: TB Schardl Date: Fri Nov 11 21:34:37 2016 +0000 [PassManager] Ensure that extensions to the pass manager that are intended to run last only run once on Tapir programs. commit 6f2c14afe41e2bb9729976b52734d98f3c99bae3 Author: TB Schardl Date: Fri Nov 11 21:18:30 2016 +0000 [LoopSpawning] Ensure that calculation of a Tapir loop limit is inserted at the end of the loop's preheader. commit e372554cd7396b1facc00f6d5df7d51f89553e31 Author: William S. Moses Date: Thu Nov 3 23:57:38 2016 -0400 Remove some debug prints commit 6baad834b9903206be5830e9a5d81cb8c118dc80 Author: William S. Moses Date: Thu Nov 3 23:54:44 2016 -0400 Remove some debug prints commit 782593d7bcd41736b148b6b128890d31f0d49f10 Author: TB Schardl Date: Tue Nov 1 14:40:47 2016 +0000 [LoopSpawning] Cleaning up code and debug output. commit f604273ecf927017dc48afdae928477f8708e0d5 Author: TB Schardl Date: Tue Nov 1 14:39:42 2016 +0000 [Detach2Cilk] Should not need to inline detached helper functions anymore, because Detach2Cilk should properly handle debug symbols. commit 20d299f2d2839b1f45b6716970f5a99ee821cec3 Author: TB Schardl Date: Tue Nov 1 14:37:40 2016 +0000 [PassManagerBuilder] Run SimplifyCFG after Detach2Cilk to clean up cruft left by Detach2Cilk. commit 1610d83dd9f26a9f47004634f83b7e5a614f46f6 Author: TB Schardl Date: Tue Nov 1 14:36:49 2016 +0000 [Detach2Cilk] Fix to ensure that Phi nodes in the continuation of a detach are still valid after lowering the detach to Cilk runtime calls. commit ea14d8bd01adccba902cdae883625698319b7d61 Author: TB Schardl Date: Tue Nov 1 04:42:24 2016 +0000 [CilkABI] Converting Detach2Cilk pass to use new Tapir outlining methods, in order to handle debug symbols more correctly. commit 1f30c735f929c5821cf575aeea59ee1b6eef3164 Author: TB Schardl Date: Mon Oct 31 21:56:25 2016 +0000 [LoopSpawning] Fixed bugs to properly erase loops after performing transformation and to handle preheaders terminated by syncs. commit a86651dd973a6f0743b4a360396dba6360fc5bdf Author: TB Schardl Date: Mon Oct 31 21:54:45 2016 +0000 [Outline] Cleaning up CreateHelper Tapir outlining method. commit 31691cd15ae0f76c40420339849f652888294863 Author: TB Schardl Date: Mon Oct 31 15:38:08 2016 +0000 [LoopSpawning] Cleaning up LoopSpawning code, and adding output to loop-spawning reports. commit 51220e44f007bb6b5be02ecbbf2e20840634daba Author: TB Schardl Date: Mon Oct 31 15:34:55 2016 +0000 [Tapir] Renaming TapirOutline to Outline. commit 6950ba60b07973d535c06f288e0ed30b14d43aa9 Author: TB Schardl Date: Sun Oct 30 19:19:15 2016 +0000 [TargetLoweringBase] Dealing with compile warning on TargeetLoweringBase. commit 581677b179aa2ed89134c8034ac491fae68595f0 Author: TB Schardl Date: Sun Oct 30 19:18:10 2016 +0000 [LoopSpawning] Replacing Loop2Cilk with LoopSpawning. commit 39d404b1998c4c2d3635939c27f85c70e987d70f Author: TB Schardl Date: Sun Oct 30 18:54:23 2016 +0000 [DiagnosticInfo] New method for emitting warning messages for the LoopSpawning pass. commit 3d834b9e67f2779d2acd2bfd65d0b192561597d1 Author: TB Schardl Date: Thu Oct 27 21:27:33 2016 +0000 Updating passes to run around new Loop2Cilk implementation. commit 35ec023f57f3a240f598d2a9822ec29aedcaf48c Author: TB Schardl Date: Thu Oct 27 21:25:43 2016 +0000 Moving Tapir-specific transformations to a separate subdirectory under Transforms. commit 3aae9e2c7b3402a3816f5b31a70a9326674c7a9f Author: TB Schardl Date: Sat Oct 22 14:40:05 2016 +0000 [Cilk] Refactoring components for lowering Tapir to Cilk runtime calls. commit 0a92f963f5978e3f7cd91a1f77a9b3040b4a2baf Merge: 54f16a4669d fe05c97a9eb Author: TB Schardl Date: Sat Oct 22 14:33:05 2016 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 54f16a4669deaefc6a92a6f098485ee2d02d608b Author: TB Schardl Date: Sat Oct 22 14:30:27 2016 +0000 [Local] Cleaned up formatting to get rid of tabs. commit a8fade288fdbc1e194b7b0adba5ebdf61f05cb38 Author: TB Schardl Date: Sat Oct 22 14:28:18 2016 +0000 [Local] Fix to SerializeDetachedCFG to preserve debug symbols. commit 5cc10ed3110941799eb681ad00833028ca692193 Author: TB Schardl Date: Sat Oct 22 14:17:40 2016 +0000 [Instrumentation] Adding CSI instrumentation pass, copied from https://github.com/CSI-LLVM/. commit fe05c97a9eb98c01cfaa7a1a5129b0d002e2db70 Author: William S. Moses Date: Sat Oct 22 10:00:23 2016 -0400 Resolve issue 7 commit 4664388bb8c70312e21d321196942924a23955ff Author: TB Schardl Date: Wed Oct 19 16:01:28 2016 +0000 [emacs] Added detach, reattach, and sync as control instructions in LLVM's emacs mode. commit c0e8f4fe8db4bdac7f84bbf2ce6cb8a73a9252bd Author: TB Schardl Date: Mon Oct 17 04:14:35 2016 +0000 [SSAUpdater] Derive the correct value from detached predecessors. commit 2abd121b4c25579045347105a56b8383d0cefb9d Author: TB Schardl Date: Fri Oct 14 21:46:24 2016 +0000 [LICM] Fixing compiler crash when LICM attempts to move a store outside of a Tapir loop. commit 28606d0fb2e4e2bcaf37959292c2a89cedaf7a1e Author: TB Schardl Date: Thu Oct 13 02:12:43 2016 +0000 [AliasAnalysis] Minor formatting change. commit e5e04d08d7ddad2e021d0744ef52c52048955a2c Author: TB Schardl Date: Thu Oct 13 02:08:30 2016 +0000 [InlineFunction] Preventing InlineFunction from moving alloca's out of their detached context after inlining. commit 14719bb0513004960e3c8b0571b82981cc2b1239 Merge: 84848c51548 7f4bee18532 Author: William S. Moses Date: Thu Oct 6 13:53:55 2016 -0400 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 84848c51548b59b6beafa5c90615f36e64500199 Author: William S. Moses Date: Thu Oct 6 13:53:50 2016 -0400 Allow full unrolling of cilk for loops commit 7f4bee185325eebc78533ef450a45e43926da694 Author: TB Schardl Date: Thu Oct 6 16:51:37 2016 +0000 [AliasAnalysis] Force AliasAnalysis to fail fast if it finds a detached CFG that reaches its own Detach instruction. commit a2c6e22dd11c4212dbb64ce15020f677d77ed479 Author: TB Schardl Date: Tue Oct 4 22:44:38 2016 +0000 [Loop2Cilk] Fix splitting of loop preheaders that are terminated by sync instructions. commit 1d1bdcf375abd2e0e83a8500278acc6124bf16f2 Author: William S. Moses Date: Sun Oct 2 23:19:30 2016 -0400 minor modref fix commit 9ca914a946ee787fa8750a0a622d0f901641f2cf Author: William S. Moses Date: Fri Sep 23 16:12:32 2016 -0400 fix line info commit 16395e5ae2ab1cbc17de82c0127680aeccecedc1 Author: William S. Moses Date: Thu Sep 22 09:08:42 2016 -0400 Additional clean up commit af36e03c8282f4c431260dbfe16e3c323c72b82d Author: William S. Moses Date: Wed Sep 21 16:56:01 2016 -0400 clean up unrollinng commit 87d19e853f283cf9fac9c1e71239e34227fad27c Author: William S. Moses Date: Wed Sep 21 16:48:27 2016 -0400 resolve move to clang 4 commit 79323f66683946df1702005e3071f7fed23f0c3d Author: William S. Moses Date: Thu Sep 15 15:06:36 2016 -0400 fix tre commit 574835b96b09f8d9b496f17c303b7a3457cd2e1f Author: William S. Moses Date: Thu Sep 15 12:01:49 2016 -0400 Fix mem2reg bug commit 88cccc72240abd17a1dec0b2d238686919db7e81 Author: William S. Moses Date: Tue Sep 13 17:14:44 2016 -0400 fix running bugs commit f449ac224baed049d3a4eecaccaeef7ac0954e36 Author: William S. Moses Date: Mon Sep 12 14:10:31 2016 -0400 fmt commit 1d618f6fc664f473131fa11d3b5ba495e3d1cbbd Author: William S. Moses Date: Mon Sep 12 14:08:22 2016 -0400 fmt commit 05d2fe180fe4980474f8e7317936b312b749e048 Author: William S. Moses Date: Mon Sep 12 14:07:24 2016 -0400 fmt commit cb166968bc4f79b54e24272b59f935e3239109c6 Author: William S. Moses Date: Wed Aug 17 22:11:31 2016 -0400 solid commit 1be62909730984141b5afbec84c48823735c4429 Merge: c3eb1b7594a e65e275cf2f Author: William S. Moses Date: Wed Aug 17 18:01:27 2016 -0400 Merge remote-tracking branch 'llvm/master' commit c3eb1b7594a5953a324015aa08f745e31fb0ec65 Author: William S. Moses Date: Wed Aug 17 18:00:22 2016 -0400 cleanup commit 925a26d33e5aa664ed2a950bfac6f123832d28f1 Author: William S. Moses Date: Wed Aug 17 17:55:49 2016 -0400 cleanup commit 8a4aa28bc1ac48d2073507eb365e2461b206f524 Merge: 9ee354913cb 7177ff558c7 Author: William S. Moses Date: Wed Aug 17 02:54:17 2016 -0400 merge to mainline commit 9ee354913cb1d00c79b0173d87e8259db193d73f Author: William S. Moses Date: Mon Aug 15 01:43:52 2016 -0400 Add race detector commit 9b7715ebfc3bdd80382cbce7ca724868789c9cd6 Author: William S. Moses Date: Wed Aug 10 00:04:31 2016 -0400 cmake fixes commit b66e56629e6ddd6895342d281ed510b011cecff1 Author: Ubuntu Date: Fri Jul 29 21:11:20 2016 +0000 LICM fix commit c1aabfb01f044642dc9fb4317313d408c3cc39fc Author: William S. Moses Date: Wed Jul 27 21:22:20 2016 -0400 add merge functions commit 72b025f6f0d254ab7e37e7cabb42e9e27f01ede8 Author: William S. Moses Date: Wed Jul 20 13:40:34 2016 -0400 fix dt commit 39c33184af36efb1af71591940caf1924ace5ac8 Author: William S. Moses Date: Wed Jul 20 13:34:33 2016 -0400 fix dt commit af099d0ad6a6c263f969e2c8b577d8a6c80bd685 Author: William S. Moses Date: Wed Jul 20 13:14:30 2016 -0400 fix dt commit 920d83fc1bed8c82c0f2ccf58379371445206469 Author: William S. Moses Date: Wed Jul 20 12:12:44 2016 -0400 fix ph issue commit b0abbc37c6e836acf46b8703b54a0881fd499b96 Author: William S. Moses Date: Wed Jul 20 11:49:12 2016 -0400 resolve print commit d7aa05a4ebf5866d9fe70dd3733e9e20df4fdd76 Author: William S. Moses Date: Tue Jul 19 18:10:57 2016 -0400 major pbbs bugfix commit f470066edb8b7a8d8db7cef0b9a7b65f8fd8090a Author: William S. Moses Date: Tue Jul 19 14:31:06 2016 -0400 fix ppbs bug commit e1ac630d820ec2a7455392f4ddc9c4c620ea26c2 Author: William S. Moses Date: Mon Jul 18 21:35:07 2016 -0400 mod graint position commit 0e725b855f90f63703d71a8761f717697912b65c Author: William S. Moses Date: Mon Jul 18 21:14:16 2016 -0400 mod graint position commit 83e0982370d9a89d4f0b0b33636511568d8eda40 Author: William S. Moses Date: Mon Jul 18 16:17:40 2016 -0400 cilk abi fixes commit 63738d884d78c5297d1c781da81b6599e9cdeba3 Author: William S. Moses Date: Mon Jul 18 13:07:38 2016 -0400 fix recursive idx commit 45ca520784a38bbc13b0d00597310d931c757e4b Author: William S. Moses Date: Mon Jul 18 02:25:34 2016 -0400 fix issues with d2c extraction commit 0e9c93c9d38a035d1ea88c2fbfbff6d6144cde0f Author: William S. Moses Date: Sun Jul 17 22:21:06 2016 -0400 add reopt commit ec8c23de30635cb0969514bd18068d4e2bd77ec9 Author: William S. Moses Date: Sun Jul 17 22:18:39 2016 -0400 prevent rerunning passes commit 8d6bd63be4a6c8ebf61be02b9d2d8535de3b9484 Author: William S. Moses Date: Thu Jul 14 13:19:44 2016 -0700 fix asm errors commit f83bdc1fab9bf732ea0be8b134cea617e4f85500 Author: William S. Moses Date: Tue Jul 12 08:18:01 2016 -0700 fix unreachable merge domtree bug commit 662b5a7e0018b659b08dc9256dfd61f94d756f56 Author: William S. Moses Date: Mon Jul 11 16:04:43 2016 -0400 Resolve issues with bounds detection in loop2cilk commit 4866c5da1c28d2c67dc168edf119cc4adfbc07f3 Author: William S. Moses Date: Thu Jul 7 09:28:14 2016 -0400 minor attr fix commit 1f4c43c41f109f82859a88525a851f00b2e1b5e4 Author: William S. Moses Date: Thu Jun 30 15:05:11 2016 -0400 fix bounds error commit 0caf3f63eb873abb93e06080eb875f0945c5c2df Author: William S. Moses Date: Thu Jun 30 14:13:54 2016 -0400 speedup fix commit 5cf555f901601c76bc416f7ef94dc77b375bcf84 Author: William S. Moses Date: Thu Jun 30 12:41:46 2016 -0400 resolve linker issues commit 25e91bfc5f42f6eb1977cefe90336e85994d65d3 Author: William S. Moses Date: Thu Jun 30 12:37:47 2016 -0400 prevent l2c recursive loops commit 325bce7bb19e0e4828e6f7eba6ba6420a1f59f7a Author: William S. Moses Date: Wed Jun 29 22:41:14 2016 -0400 fix issue with loop parents commit 8e0997cb4b85e14c83783d81a7e3815d64fc6056 Author: William S. Moses Date: Wed Jun 29 21:10:51 2016 -0400 more efficient loops commit f302f9480f94a4e7f816707e5224c85e0bf07218 Author: William S. Moses Date: Wed Jun 29 01:05:05 2016 -0400 l2c computes grain size commit 1dbd257083c5d5e95fa662cc99da0b150aed94e2 Author: William S. Moses Date: Tue Jun 28 16:47:52 2016 -0400 more error info for bad return state commit ec4340b4cee3951abf49ad1636bff07cb77fb80f Author: William S. Moses Date: Mon Jun 27 17:57:49 2016 -0400 fix accidental breakage commit 88ceb1203926d59578e2c0dba02bf3b38f374120 Author: William S. Moses Date: Mon Jun 27 14:39:50 2016 -0400 fix loop2cilk indvar incr adding issue commit 0a1cbbf7dff910f348713a88108169e03dabf3de Author: William S. Moses Date: Fri Jun 24 13:43:53 2016 -0400 Better Parallel TRE commit bc96f0b3f141176d1667b1700be945aed7520e9c Author: William S. Moses Date: Fri Jun 24 01:38:46 2016 -0400 Parallel TRE commit 579d39d8efab448cacf9c41aea8197226c64bfe4 Author: William S. Moses Date: Thu Jun 23 13:47:13 2016 -0400 more secure sync detect for loop2cilk commit c06f49770a26c971efe66356b90a0a1ef7f2a301 Author: William S. Moses Date: Wed Jun 22 16:57:07 2016 -0400 Fix alloca issues for detached code commit 150056edc4a2bb03c0bbe94923cfa189ce44f052 Author: William S. Moses Date: Tue Jun 21 19:17:47 2016 -0400 minor opt diff commit 497c3b498bc8ce71ad913dff063853204810f402 Author: William S. Moses Date: Tue Jun 21 15:02:58 2016 -0400 modify pass commit 01e49c3727f69e2da875989b4e61ab10fc058327 Author: William S. Moses Date: Tue Jun 21 01:14:31 2016 -0400 fix loop2cilk recog issue commit 1c52cbf136f247110b7c9e4cac0a5a0d73ad63f7 Author: William S. Moses Date: Tue Jun 21 00:35:03 2016 -0400 remove pre sroa commit 510bfacf5154f48e729c159c95c965acf4eef120 Author: William S. Moses Date: Mon Jun 20 20:36:34 2016 -0400 loop2cilk fixes to indvar commit ef34ac80086a10e3ae04b9fd2ce4d99436eaa69e Author: Ubuntu Date: Mon Jun 20 19:00:07 2016 +0000 Resolve linker errors commit 4387eb25bb6e36f0e5f8d04c9d9d3f710864044a Author: William S. Moses Date: Mon Jun 20 14:47:48 2016 -0400 Loop2cilk new indvar calculation commit d4e44d43b5c6e40883975e87aa2c4c46759a8eb8 Author: William S. Moses Date: Mon Jun 20 04:10:48 2016 -0400 loop2cilk without opts commit 9164742231eb140864e17562dd7e79161685e293 Author: William S. Moses Date: Mon Jun 20 03:48:51 2016 -0400 correct loop bounds calculation commit d0d80c596491f3d8b7b9f2479f996f9345e9f059 Author: William S. Moses Date: Sun Jun 19 00:43:55 2016 -0400 clean up compile commit 26beb619a1384b470ca0e668c1a838ee85b78b75 Author: William S. Moses Date: Fri Jun 17 14:37:46 2016 -0400 remove debug message commit 76a163ddffdb916de1bee5fef34298e676266bff Author: Ubuntu Date: Wed Jun 15 20:58:36 2016 +0000 nomem commit 126c754b4f8e553e6b9ff33f899afaaf4182ee04 Author: William S. Moses Date: Wed Jun 15 15:41:57 2016 -0400 fixes and less print commit cd037d2993381148f11954f51ff89c6b5e599086 Author: William S. Moses Date: Tue Jun 14 23:33:28 2016 -0400 restore cilkabi commit 5964e893682feec3a63d17999d32c2125486e879 Author: William S. Moses Date: Tue Jun 14 23:19:52 2016 -0400 fix inline bug commit b5a22ebc589fc25b72f513eb16ccbedc6482e9f2 Author: William S. Moses Date: Tue Jun 14 14:32:41 2016 -0400 cleanup dumps commit 2ab9f07b81a7fb04c33926c2899c4af1753d6175 Author: William S. Moses Date: Tue Jun 14 14:30:04 2016 -0400 cleanup dumps commit 56d8d0f052de051328c2077bcd47e75f34d9f034 Author: William S. Moses Date: Tue Jun 14 12:35:26 2016 -0400 cleanup dumps commit d95ce1575159c12135952b3fa39a092bc77ad298 Author: William S. Moses Date: Tue Jun 14 12:29:38 2016 -0400 addl sroa fixes commit 2754c0b40a4ca26d3201005a1d2796b840bdcce7 Author: William S. Moses Date: Tue Jun 14 12:16:02 2016 -0400 loop2cilk ordering issue for ind var calculation fixed commit bebf5cc0565d9060e78a3caeb880b2ce8f43b36c Author: William S. Moses Date: Tue Jun 14 11:27:20 2016 -0400 Fix SROA for detached allocas commit 222ecb6dfd053282d450cbe9cffc7cea4d98fa5d Author: William S. Moses Date: Tue Jun 14 00:36:00 2016 -0400 minor bugfix commit 446ad1a3bad89a44dd2c361cc0d9417a0a07eb2b Author: William S. Moses Date: Mon Jun 13 21:59:25 2016 -0400 bugfixes commit bc37ee11a97c23b0576d45bcc94e7a597ff30a39 Author: William S. Moses Date: Thu Jun 9 10:43:21 2016 -0400 Fix odd LICM error commit abfc103a0f06248526972ddd6f6057e372d56383 Author: William S. Moses Date: Wed Jun 8 01:04:49 2016 -0400 parallel opt levels and fix codegen pt 1 commit cab96d82f5d94a4a6745983953f43850d3a80f7d Author: William S. Moses Date: Fri Jun 3 01:43:13 2016 -0400 fix compile script commit 6284487a349fe982d5d24d2ff45d8ff5c8d25708 Author: William S. Moses Date: Fri Jun 3 01:41:01 2016 -0400 fix l2c commit 3783dfebd1a8d94ab40b958e03ffb99ac54e3f5b Author: William S. Moses Date: Thu Jun 2 23:50:39 2016 -0400 Fix allocation issues commit fc2042d6a1331df9a55148208d27b2c2d4834ef7 Author: William S. Moses Date: Mon May 30 15:20:22 2016 -0400 add unique block debug info commit cd3303d769327d50bcf3a422496190ed349cbaac Author: William S. Moses Date: Mon May 30 15:17:18 2016 -0400 fix exit block detection l2c commit 4865203b50d0ad69531b6459a35d557908db3ffe Author: William S. Moses Date: Mon May 30 15:02:11 2016 -0400 fix sync l2c detection issue commit e95a55ae8775dfe21c0ce10e0ea32332bc3d973a Author: William S. Moses Date: Sun May 29 23:31:59 2016 -0400 allow switch and better cmp block commit b17417485a42308842840748c73c76953302dc30 Author: William S. Moses Date: Sun May 29 22:09:34 2016 -0400 fix issues in multiple phi nodes for l2c commit f64fca467066650bdab351a55ec38943d360fced Author: William S. Moses Date: Sun May 29 17:29:00 2016 -0400 add addl check for loop2cilk commit 8d9ac096f9beda10ff400631aae3336b5cb0982e Author: William S. Moses Date: Sat May 28 22:36:56 2016 -0400 minor script fix commit 748021ae6a76b9d6e2ecb85b3e247455d5e9bdb9 Author: William S. Moses Date: Sat May 28 22:24:41 2016 -0400 lots of minor cilk error fixes commit 0132cc1ce667fd8c21adaf5b3abd5dfadac80c09 Author: William S. Moses Date: Wed May 25 11:52:28 2016 -0400 fix bug in l2c about branching into commit 9f921005730c6c92fbdf19b36714488c72c0975e Author: William S. Moses Date: Tue May 24 23:40:12 2016 -0400 fix bug in loop2cilk commit a9d9cd9529c20022fd5ca0600042065cfee21d8f Author: William S. Moses Date: Sun Apr 10 14:32:22 2016 -0400 resolve block seg commit 7410b7bcfbf610b34a0f42c0966cbdbd2e9b2e97 Author: William S. Moses Date: Sun Apr 10 13:55:01 2016 -0400 fixes commit 11a77b870e734e617b00e4b55f09526cf2ac37d4 Author: William S. Moses Date: Thu Apr 7 03:04:30 2016 -0400 add compile commit f2ec969a1965da3224fdffed035b9d39114d2b9a Author: William S. Moses Date: Thu Apr 7 03:04:17 2016 -0400 pre detach merging / loop unroll fixes commit 9c00e9b80d865cf478607a4ddb90ca018ad2978c Author: William S. Moses Date: Thu Apr 7 00:27:15 2016 -0400 sync fix commit 1f3c6dcb9d48ba519fde34c66b657571949428f7 Author: William S. Moses Date: Thu Apr 7 00:12:58 2016 -0400 bug fixes commit 0f1b1cf061ab790622c6498e0df9c5487a8d610c Author: William S. Moses Date: Tue Apr 5 18:44:04 2016 -0400 resolve delete issues commit 86cd5870f9d667ff36b2c10971216e8f6d0977d0 Author: William S. Moses Date: Tue Apr 5 13:10:36 2016 -0400 resolve delete issues commit 06defa794acaf1f13ecdd63d57b38a49e2561492 Merge: 2f7e6ec4fa6 8b47c17a53d Author: William S. Moses Date: Tue Apr 5 11:57:10 2016 -0400 Merge remote-tracking branch 'llvm/release_38' commit 8b47c17a53d683f313eaaa93c4a53de26d8fcba5 Author: Dimitry Andric Date: Tue Apr 5 06:58:21 2016 +0000 Merging r264335: ------------------------------------------------------------------------ r264335 | dim | 2016-03-24 21:39:17 +0100 (Thu, 24 Mar 2016) | 17 lines Add to ThreadPool.h, since std::atomic is used Summary: Apparently, when compiling with gcc 5.3.2 for powerpc64, the order of headers is such that it gets an error about std::atomic<> use in ThreadPool.h, since this header is not included explicitly. See also: https://llvm.org/bugs/show_bug.cgi?id=27058 Fix this by including . Patch by Bryan Drewery. Reviewers: chandlerc, joker.eph Subscribers: bdrewery, llvm-commits Differential Revision: http://reviews.llvm.org/D18460 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265380 91177308-0d34-0410-b5e6-96231b3b80d8 commit 295c7a62d88d363361198766ce95900441727da9 Author: Renato Golin Date: Sat Apr 2 20:36:55 2016 +0000 Merging r263714: ARM: Revert SVN r253865, 254158, fix windows division git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265245 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2a2d901e3c55aff48990de5e415c429c4cfeb6d8 Author: Renato Golin Date: Sat Apr 2 20:32:54 2016 +0000 Merging r263123: ARM: follow up improvements for SVN r263118 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265244 91177308-0d34-0410-b5e6-96231b3b80d8 commit 97a35e605ab417f11be4ccb532fcc9015ebb2ca8 Author: Renato Golin Date: Sat Apr 2 20:31:15 2016 +0000 Merging r263118: ARM: correct __builtin_longjmp on WoA git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265243 91177308-0d34-0410-b5e6-96231b3b80d8 commit dec3a22cf5b8f8e6c6d1bf898f3a14bc4c54e0b4 Author: Tom Stellard Date: Mon Mar 28 18:13:48 2016 +0000 Bump version to 3.8.1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@264605 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2f7e6ec4fa663dff11ba3dff5f74468e79c042d9 Author: TB Schardl Date: Thu Mar 17 08:15:50 2016 +0000 Cleaning up CilkABI. commit 88a51fc0886146600e14173a0878b6567b29e3bc Author: TB Schardl Date: Thu Mar 17 08:15:05 2016 +0000 Fixing Loop2Cilk CMakeLists entries to fix cmake build. commit 0d0d243f395a4192bf4d85817c8ac14f5d9d8b2f Author: TB Schardl Date: Thu Mar 17 08:14:16 2016 +0000 Fixing Loop2Cilk for merge with 'release_38' commit 277ca2c63350507bf3ba5cd075f204e4b356fc5f Merge: 008aa9d2441 ad5750369cc Author: TB Schardl Date: Thu Mar 17 08:09:16 2016 +0000 Merge branch 'release_38' of http://llvm.org/git/llvm into tb-scratch commit 008aa9d24417420734027b5072ea48cc86b428d2 Author: William S. Moses Date: Sat Mar 12 17:32:11 2016 -0500 loop2cilk working happily commit ea5e316db15804df27dcfaf6b790f07c8e7bd2b2 Merge: 9b3fc2538fd 1526147c0ad Author: William S. Moses Date: Thu Mar 10 13:16:18 2016 -0500 Merge branch 'tb-scratch' of ssh://github.com/taekwonbilly/Parallel-IR into tb-scratch commit 9b3fc2538fdd9218bcb1a91b954028652579c6e4 Author: William S. Moses Date: Thu Mar 10 13:15:45 2016 -0500 loop2cilk mods commit ad5750369cc5b19f36c149f7b13151c99c7be47a Author: Hans Wennborg Date: Wed Mar 2 23:38:03 2016 +0000 ReleaseNotes: tidy up git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262542 91177308-0d34-0410-b5e6-96231b3b80d8 commit 0805780408c97128dc9164d4dbb8604882f5588e Author: Hans Wennborg Date: Wed Mar 2 23:10:55 2016 +0000 Remove 'if you are using a released version' warning git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262537 91177308-0d34-0410-b5e6-96231b3b80d8 commit f26161e8b05360841a1a3a4a2204ed761d6a2e04 Author: Hans Wennborg Date: Wed Mar 2 18:19:22 2016 +0000 ReleaseNotes: C API policy; by Eric Christopher git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262496 91177308-0d34-0410-b5e6-96231b3b80d8 commit 27c964e2ae0b573cf1e6551a3da255539db03d3c Author: Hans Wennborg Date: Fri Feb 26 21:37:52 2016 +0000 ReleaseNotes: PowerPC; by Kit Barton git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262074 91177308-0d34-0410-b5e6-96231b3b80d8 commit bb6f14e3581c78509405a3d415e72821db8a2066 Author: Quentin Colombet Date: Mon Feb 22 22:27:47 2016 +0000 [AArch64] Fix bug in prolog clobbering live reg when shrink wrapping. This adapts r261349 to the release branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261586 91177308-0d34-0410-b5e6-96231b3b80d8 commit e970b795a27d16c720bf4e3ff030eea241784eb4 Author: Hans Wennborg Date: Mon Feb 22 21:05:14 2016 +0000 Merging r261441, r261447, and r261546: ------------------------------------------------------------------------ r261441 | nemanjai | 2016-02-20 10:16:25 -0800 (Sat, 20 Feb 2016) | 12 lines Fix for PR 26500 This patch corresponds to review: http://reviews.llvm.org/D17294 It ensures that whatever block we are emitting the prologue/epilogue into, we have the necessary scratch registers. It takes away the hard-coded register numbers for use as scratch registers as registers that are guaranteed to be available in the function prologue/epilogue are not guaranteed to be available within the function body. Since we shrink-wrap, the prologue/epilogue may end up in the function body. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r261447 | nemanjai | 2016-02-20 12:45:37 -0800 (Sat, 20 Feb 2016) | 6 lines Fix the build bot break caused by rL261441. The patch has a necessary call to a function inside an assert. Which is fine when you have asserts turned on. Not so much when they're off. Sorry about the regression. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r261546 | nemanjai | 2016-02-22 10:04:00 -0800 (Mon, 22 Feb 2016) | 6 lines Fix for PR26690 take 2 This is what was meant to be in the initial commit to fix this bug. The parens were missing. This commit also adds a test case for the bug and has undergone full testing on PPC and X86. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261572 91177308-0d34-0410-b5e6-96231b3b80d8 commit f65e46be097186d748836d42c38a6dc7f30e6c3b Author: Hans Wennborg Date: Mon Feb 22 17:51:28 2016 +0000 Merging r261387: ------------------------------------------------------------------------ r261387 | davide | 2016-02-19 16:44:47 -0800 (Fri, 19 Feb 2016) | 8 lines [X86ISelLowering] Fix TLSADDR lowering when shrink-wrapping is enabled. TLSADDR nodes are lowered into actuall calls inside MC. In order to prevent shrink-wrapping from pushing prologue/epilogue past them (which result in TLS variables being accessed before the stack frame is set up), we put markers, so that the stack gets adjusted properly. Thanks to Quentin Colombet for guidance/help on how to fix this problem! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261542 91177308-0d34-0410-b5e6-96231b3b80d8 commit e3b2bd1e79c9c9d24490b6ddb2341afcf4210691 Author: Hans Wennborg Date: Mon Feb 22 17:47:10 2016 +0000 Merging r261384: ------------------------------------------------------------------------ r261384 | qcolombet | 2016-02-19 16:32:29 -0800 (Fri, 19 Feb 2016) | 4 lines [RegAllocFast] Properly track the physical register definitions on calls. PR26485 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261539 91177308-0d34-0410-b5e6-96231b3b80d8 commit c63a0fe41b81bac1ea6e1a053d2a8939e02edf17 Author: Hans Wennborg Date: Fri Feb 19 21:42:57 2016 +0000 Merging r261368: ------------------------------------------------------------------------ r261368 | hans | 2016-02-19 13:40:12 -0800 (Fri, 19 Feb 2016) | 3 lines Revert r255691 "[LoopVectorizer] Refine loop vectorizer's register usage calculator by ignoring specific instructions." It caused PR26509. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261369 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78e9cd40a2ea27cc9300d900a7dccc75940f9eb0 Author: Hans Wennborg Date: Fri Feb 19 21:35:00 2016 +0000 Merging r261360: ------------------------------------------------------------------------ r261360 | dim | 2016-02-19 12:14:11 -0800 (Fri, 19 Feb 2016) | 19 lines Fix incorrect selection of AVX512 sqrt when OptForSize is on Summary: When optimizing for size, sqrt calls can be incorrectly selected as AVX512 VSQRT instructions. This is because X86InstrAVX512.td has a `Requires<[OptForSize]>` in its `avx512_sqrt_scalar` multiclass definition. Even if the target does not support AVX512, the class can apparently still be chosen, leading to an incorrect selection of `vsqrtss`. In PR26625, this lead to an assertion: Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!", because the `vsqrtss` instruction requires an XMM register, which is not available on i686 CPUs. Reviewers: grosbach, resistor, joker.eph Subscribers: spatel, emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D17414 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261367 91177308-0d34-0410-b5e6-96231b3b80d8 commit fdf40bea4fc416643210790fff4345be98d97245 Author: Hans Wennborg Date: Fri Feb 19 21:28:08 2016 +0000 Merging r261365: ------------------------------------------------------------------------ r261365 | hans | 2016-02-19 13:26:31 -0800 (Fri, 19 Feb 2016) | 3 lines Revert r253557 "Alternative to long nops for X86 CPUs, by Andrey Turetsky" Turns out the new nop sequences aren't actually nops on x86_64 (PR26554). ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261366 91177308-0d34-0410-b5e6-96231b3b80d8 commit 413ee9f101de92d75fc11334ffeb6a054d67a18c Author: Renato Golin Date: Fri Feb 19 17:35:27 2016 +0000 Merge r261331: avoid out of bounds loads for interleaved access vectorization git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261341 91177308-0d34-0410-b5e6-96231b3b80d8 commit 124d2bc4dc3298d2b669be23a5b640d985319b65 Author: Hans Wennborg Date: Fri Feb 19 17:13:16 2016 +0000 Merging r261306: ------------------------------------------------------------------------ r261306 | matze | 2016-02-18 20:44:19 -0800 (Thu, 18 Feb 2016) | 1 line LegalizeDAG: Fix ExpandFCOPYSIGN assuming the same type on both inputs ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261334 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6f28d52e9d3f87875732a0f2c1f3b03ef56be2db Author: Hans Wennborg Date: Fri Feb 19 00:08:56 2016 +0000 Merging r261258: ------------------------------------------------------------------------ r261258 | rnk | 2016-02-18 12:57:41 -0800 (Thu, 18 Feb 2016) | 14 lines [IR] Straighten out bundle overload of IRBuilder::CreateCall IRBuilder has two ways of putting bundle operands on calls: the default operand bundle, and an overload of CreateCall that takes an operand bundle list. Previously, this overload used a default argument of None. This made it impossible to distinguish between the case were the caller doesn't care about bundles, and the case where the caller explicitly wants no bundles. We behaved as if they wanted the latter behavior rather than the former, which led to problems with simplifylibcalls and WinEH. This change fixes it by making the parameter non-optional, so we can distinguish these two cases. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261282 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6e961aa243f223ddb704ce708056238d7c1d7e24 Author: Hans Wennborg Date: Wed Feb 17 19:00:40 2016 +0000 Merging r261039: ------------------------------------------------------------------------ r261039 | rnk | 2016-02-16 16:17:33 -0800 (Tue, 16 Feb 2016) | 6 lines [X86] Fix a shrink-wrapping miscompile around __chkstk __chkstk clobbers EAX. If EAX is live across the prologue, then we have to take extra steps to save it. We already had code to do this if EAX was a register parameter. This change adapts it to work when shrink wrapping is used. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261136 91177308-0d34-0410-b5e6-96231b3b80d8 commit ebe537a930b58a5d32fc41ac133309139c92f7bd Author: David Majnemer Date: Wed Feb 17 18:49:28 2016 +0000 Merging r258616: ------------------------------------------------------------------------ r258616 | majnemer | 2016-01-22 22:00:44 -0800 (Fri, 22 Jan 2016) | 3 lines [PruneEH] Don't try to insert a terminator after another terminator LLVM's BasicBlock has a single terminator, it is not valid to have two. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261132 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9f25a0678ed9f06088a09649a040a6bef362e6af Author: David Majnemer Date: Wed Feb 17 18:49:09 2016 +0000 Merging r258611: ------------------------------------------------------------------------ r258611 | majnemer | 2016-01-22 21:41:29 -0800 (Fri, 22 Jan 2016) | 6 lines [PruneEH] FuncletPads must not have undef operands Instead of RAUW with undef, replace the first non-token instruction with unreachable. This fixes PR26263. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261131 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4212ebff28e32dbd26bd93f4fa77190d80357ed4 Author: David Majnemer Date: Wed Feb 17 18:48:45 2016 +0000 Merging r258610: ------------------------------------------------------------------------ r258610 | majnemer | 2016-01-22 21:41:27 -0800 (Fri, 22 Jan 2016) | 3 lines [PruneEH] Unify invoke and call handling in DeleteBasicBlock No functionality change is intended. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261130 91177308-0d34-0410-b5e6-96231b3b80d8 commit ba95fe05372c1934c30e21747480d401c1e5bcec Author: David Majnemer Date: Wed Feb 17 18:48:28 2016 +0000 Merging r258609: ------------------------------------------------------------------------ r258609 | majnemer | 2016-01-22 21:41:22 -0800 (Fri, 22 Jan 2016) | 5 lines [PruneEH] Reuse code from removeUnwindEdge PruneEH had functionality idential to removeUnwindEdge. Consolidate around removeUnwindEdge. No functionality change is intended. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261129 91177308-0d34-0410-b5e6-96231b3b80d8 commit 77c8a562e0c7c47df3bb988e2d230df6a9dcbe1d Author: David Majnemer Date: Wed Feb 17 18:42:17 2016 +0000 Merging r259702: ------------------------------------------------------------------------ r259702 | majnemer | 2016-02-03 13:30:34 -0800 (Wed, 03 Feb 2016) | 7 lines [LoopStrengthReduce] Don't rewrite PHIs with incoming values from CatchSwitches Bail out if we have a PHI on an EHPad that gets a value from a CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is no good place to stick any instructions. This fixes PR26373. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261126 91177308-0d34-0410-b5e6-96231b3b80d8 commit c75c50f45b3d6d1d61ce6b411d12cedaadd71d5b Author: David Majnemer Date: Wed Feb 17 18:41:44 2016 +0000 Merging r260164: ------------------------------------------------------------------------ r260164 | akaylor | 2016-02-08 14:52:51 -0800 (Mon, 08 Feb 2016) | 5 lines [regalloc][WinEH] Do not mark intervals as not spillable if they contain a regmask Differential Revision: http://reviews.llvm.org/D16831 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261125 91177308-0d34-0410-b5e6-96231b3b80d8 commit fde3338c42eb085f169ecc3817c4736075e4a683 Author: David Majnemer Date: Wed Feb 17 18:41:08 2016 +0000 Merging r260733: ------------------------------------------------------------------------ r260733 | akaylor | 2016-02-12 13:10:16 -0800 (Fri, 12 Feb 2016) | 5 lines [WinEH] Prevent EH state numbering from skipping nested cleanup pads that never return Differential Revision: http://reviews.llvm.org/D17208 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261124 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2507c58ca21ee01c359cd5ddf2fe84eea16366ee Author: Hans Wennborg Date: Wed Feb 17 17:57:26 2016 +0000 ReleaseNotes: new Win EH instructions; by David Majnemer git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261116 91177308-0d34-0410-b5e6-96231b3b80d8 commit d77e9352a80c954cf91335c236224e4ca7d9c5f4 Author: Hans Wennborg Date: Wed Feb 17 16:40:51 2016 +0000 Merging r261033: ------------------------------------------------------------------------ r261033 | akaylor | 2016-02-16 15:52:18 -0800 (Tue, 16 Feb 2016) | 5 lines Fix build LLVM with -D LLVM_USE_INTEL_JITEVENTS:BOOL=ON on Windows Differential Revision: http://reviews.llvm.org/D16940 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261106 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7609bf251117db67abfe0d5b6622860afc769278 Author: Hans Wennborg Date: Wed Feb 17 00:05:18 2016 +0000 ReleaseNotes: -femultated-tls; by Chih-hung Hsieh git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261035 91177308-0d34-0410-b5e6-96231b3b80d8 commit 07fd930a2be55b0789737cd9769f0d0e42def3a7 Author: Hans Wennborg Date: Tue Feb 16 23:22:17 2016 +0000 Merging r260390: ------------------------------------------------------------------------ r260390 | jyknight | 2016-02-10 09:47:20 -0800 (Wed, 10 Feb 2016) | 12 lines [SPARC] Repair floating-point condition encodings in assembly parser. The encodings for floating point conditions A(lways) and N(ever) were incorrectly specified for the assembly parser, per Sparc manual v8 page 121. This change corrects that mistake. Also, strangely, all of the branch instructions already had MC test cases, except for the broken ones. Added the tests. Patch by Chris Dewhurst Differential Revision: http://reviews.llvm.org/D17074 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261031 91177308-0d34-0410-b5e6-96231b3b80d8 commit b7b1a389f7d546dbe6a67aa3bb0e66f689e99c1b Author: Hans Wennborg Date: Tue Feb 16 21:46:52 2016 +0000 Merging r258103: ------------------------------------------------------------------------ r258103 | kli | 2016-01-18 16:04:41 -0800 (Mon, 18 Jan 2016) | 2 lines parseArch() supports more variations of arch names for PowerPC builds ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261015 91177308-0d34-0410-b5e6-96231b3b80d8 commit fff361d60b64ac8ee9fcb523872aa7beea8ab8e1 Author: Hans Wennborg Date: Tue Feb 16 19:37:14 2016 +0000 ReleaseNotes: shrink-wrapping; by Quentin Colombet git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261001 91177308-0d34-0410-b5e6-96231b3b80d8 commit b129a10bb92529289bbb26d2335b12858e54a885 Author: Hans Wennborg Date: Tue Feb 16 19:29:54 2016 +0000 ReleaseNotes: typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261000 91177308-0d34-0410-b5e6-96231b3b80d8 commit d3b1222c56e9214e49a3d829e8e60910f8c88903 Author: Hans Wennborg Date: Tue Feb 16 19:27:50 2016 +0000 ReleaseNotes: Hexagon; by Krzysztof Parzyszek git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260999 91177308-0d34-0410-b5e6-96231b3b80d8 commit f1aaed61455e48b6c7444f706a6f997a864a42fa Author: Hans Wennborg Date: Tue Feb 16 19:20:40 2016 +0000 Merging r257864 and r258112: ------------------------------------------------------------------------ r257864 | axw | 2016-01-14 19:33:35 -0800 (Thu, 14 Jan 2016) | 12 lines [docs] Document LLVM_{BUILD,LINK}_LLVM_DYLIB Summary: Document the LLVM_BUILD_LLVM_DYLIB and LLVM_LINK_LLVM_DYLIB CMake options, move BUILD_SHARED_LIBS out of frequently-used, and add a note/warning to BUILD_SHARED_LIBS. Reviewers: beanz, delcypher, mjacob Subscribers: mjacob, llvm-commits Differential Revision: http://reviews.llvm.org/D16208 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r258112 | axw | 2016-01-18 21:43:21 -0800 (Mon, 18 Jan 2016) | 8 lines docs: address post-commit review Rewording/expansion of CMake options suggested by Dan Liew. See http://reviews.llvm.org/D16208. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260996 91177308-0d34-0410-b5e6-96231b3b80d8 commit 80cc2ce6475352a29e19824443c2e0a31a37b44d Author: Hans Wennborg Date: Tue Feb 16 19:19:03 2016 +0000 ReleaseNotes: -DLLVM_LINK_LLVM_DYLIB=ON; by Andrew Wilkins git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260995 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1e466cf4f8098acc7025f8d71dd0f64c4754ed63 Author: Hans Wennborg Date: Tue Feb 16 19:07:38 2016 +0000 ReleaseNotes: ORC in Kaleidoscope and C bindings; by Lang Hames git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260991 91177308-0d34-0410-b5e6-96231b3b80d8 commit b508a338d9d922a1ec3fbef698bd9fc6b5217ae0 Author: Hans Wennborg Date: Tue Feb 16 17:38:25 2016 +0000 ReleaseNotes: fix typo, reported by Eugene git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260985 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4f229233ffc588a35e3738d3c358f2cf7a5da1d1 Author: Hans Wennborg Date: Fri Feb 12 19:03:12 2016 +0000 Merging r260703: ------------------------------------------------------------------------ r260703 | hans | 2016-02-12 11:02:39 -0800 (Fri, 12 Feb 2016) | 11 lines [CMake] don't build libLTO when LLVM_ENABLE_PIC is OFF When cmake is run with -DLLVM_ENABLE_PIC=OFF, build fails while linking shared library libLTO.so, because its dependencies are built with -fno-PIC. More details here: https://llvm.org/bugs/show_bug.cgi?id=26484. This diff reverts r252652 (git 9fd4377ddb83aee3c049dc8757e7771edbb8ee71), which removed check NOT LLVM_ENABLE_PIC before disabling build for libLTO.so. Patch by Igor Sugak! Differential Revision: http://reviews.llvm.org/D17049 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260704 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7e2ddb94a31d1d085b0228e374799566faa82b8e Author: Peter Collingbourne Date: Fri Feb 12 18:46:48 2016 +0000 ARM: Mention r251322 in release notes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260702 91177308-0d34-0410-b5e6-96231b3b80d8 commit 347f4e82e80af64eca192381112ff6e9e3c7c8c3 Author: Hans Wennborg Date: Fri Feb 12 17:52:29 2016 +0000 Merging r260641: ------------------------------------------------------------------------ r260641 | axw | 2016-02-11 17:42:43 -0800 (Thu, 11 Feb 2016) | 10 lines Avoid linking LLVM component libraries with libLLVM Patch by Jack Howarth. When linking to libLLVM, don't also link to the component libraries that constitute libLLVM. Differential Revision: http://reviews.llvm.org/D16945 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260693 91177308-0d34-0410-b5e6-96231b3b80d8 commit e469b8a4f8daa8d29fe1d1f8ed87b36114dd5726 Author: Hans Wennborg Date: Fri Feb 12 16:18:07 2016 +0000 Merging r260427: ------------------------------------------------------------------------ r260427 | nha | 2016-02-10 12:13:58 -0800 (Wed, 10 Feb 2016) | 16 lines AMDGPU: Release the scavenged offset register during VGPR spill Summary: This fixes a crash where subsequent spills would be unable to scavenge a register. In particular, it fixes a crash in piglit's spec@glsl-1.50@execution@geometry@max-input-components (the test still has a shader that fails to compile because of too many SGPR spills, but at least it doesn't crash any more). This is a candidate for the release branch. Reviewers: arsenm, tstellarAMD Subscribers: qcolombet, arsenm Differential Revision: http://reviews.llvm.org/D16558 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260687 91177308-0d34-0410-b5e6-96231b3b80d8 commit ec95d6fe25dcb8b1450c4440da7c7a7e2982b6f2 Author: Renato Golin Date: Fri Feb 12 15:29:34 2016 +0000 [ARM/AArch64] 3.8.0 release notes changes git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260684 91177308-0d34-0410-b5e6-96231b3b80d8 commit 10a5589d08c1de3fcd715ce23697d4e591519595 Author: Dylan McKay Date: Fri Feb 12 06:38:02 2016 +0000 [AVR] Add release notes for 3.8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260659 91177308-0d34-0410-b5e6-96231b3b80d8 commit 12009f63c5d16b98334930a2b97d279c6bf82ea0 Author: Hans Wennborg Date: Fri Feb 12 02:32:24 2016 +0000 ReleaseNotes: oh, there already was a section about X86 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260650 91177308-0d34-0410-b5e6-96231b3b80d8 commit fb52ed812c40eb8c6f1f69575bb231b62b319a95 Author: Hans Wennborg Date: Fri Feb 12 02:29:33 2016 +0000 ReleaseNotes: start off a 'Changes to X86' section git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260648 91177308-0d34-0410-b5e6-96231b3b80d8 commit e293d6c8d134ad352bb69defee17c5c902476933 Author: Hans Wennborg Date: Fri Feb 12 01:56:35 2016 +0000 Release Notes: RegisterScheduler::setDefault removed; by Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260643 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7a0ec464f16e761602ac9c4e1f610029c0346745 Author: Hans Wennborg Date: Fri Feb 12 01:42:38 2016 +0000 Merging r260587: ------------------------------------------------------------------------ r260587 | pete | 2016-02-11 13:10:40 -0800 (Thu, 11 Feb 2016) | 13 lines Set load alignment on aggregate loads. When optimizing a extractvalue(load), we generate a load from the aggregate type. This load didn't have alignment set and so would get the alignment of the type. This breaks when the type is packed and so the alignment should be lower. For example, loading { int, int } would give us alignment of 4, but the original load from this type may have an alignment of 1 if packed. Reviewed by David Majnemer Differential revision: http://reviews.llvm.org/D17158 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260640 91177308-0d34-0410-b5e6-96231b3b80d8 commit 73a8ae3c0f127d45e391bd8b40be51c2fbc15dd8 Author: Hans Wennborg Date: Fri Feb 12 00:45:55 2016 +0000 ReleaseNotes: drop in-progress warning and svn checkout note git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260634 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2ec5a319cacb9e13bf20bc8b9113d11212f10aae Author: Kai Nacke Date: Thu Feb 11 20:42:16 2016 +0000 Add LDC compiler to list of external OS projects using LLVM 3.8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260584 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6ca6b8a0c8560555aed16b880f1499a5a0b4deda Author: Duncan P. N. Exon Smith Date: Wed Feb 10 19:20:23 2016 +0000 ReleaseNotes: Document changes to ilist API git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260415 91177308-0d34-0410-b5e6-96231b3b80d8 commit 185bb1287f864701d9b19eef89e7838162e7c793 Author: Hans Wennborg Date: Mon Feb 8 22:15:55 2016 +0000 Merging r259958: ------------------------------------------------------------------------ r259958 | evandro | 2016-02-05 16:01:41 -0800 (Fri, 05 Feb 2016) | 11 lines [AArch64] Add the scheduling model for Exynos-M1 Summary: Add the core scheduling model for the Samsung Exynos-M1 (ARMv8-A). Reviewers: jmolloy, rengolin, christof, MinSeongKIM, t.p.northover Subscribers: aemerson, rengolin, MatzeB Differential Revision: http://reviews.llvm.org/D16644 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260156 91177308-0d34-0410-b5e6-96231b3b80d8 commit 777479f80202057f041683129d4fd9e574ffea79 Author: Hans Wennborg Date: Mon Feb 8 18:31:49 2016 +0000 Merging r259696: ------------------------------------------------------------------------ r259696 | kfischer | 2016-02-03 13:13:33 -0800 (Wed, 03 Feb 2016) | 12 lines [DWARFDebug] Fix another case of overlapping ranges Summary: In r257979, I added code to ensure that we wouldn't merge DebugLocEntries if the pieces they describe overlap. Unfortunately, I failed to cover the case, where there may have multiple active Expressions in the entry, in which case we need to make sure that no two values overlap before we can perform the merge. This fixed PR26148. Reviewers: aprantl Differential Revision: http://reviews.llvm.org/D16742 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260121 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7ecd92d75cda45668b6b5fdbcdd2142826514e66 Author: Daniel Sanders Date: Mon Feb 8 14:14:18 2016 +0000 [mips] Add initial release notes for MIPS32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260095 91177308-0d34-0410-b5e6-96231b3b80d8 commit ff65de018b6bb5bc4da3e923bbc0f55c5ca8e039 Author: Hans Wennborg Date: Fri Feb 5 22:17:38 2016 +0000 Merging r259381: ------------------------------------------------------------------------ r259381 | uweigand | 2016-02-01 10:31:19 -0800 (Mon, 01 Feb 2016) | 21 lines [SystemZ] Fix wrong-code generation for certain always-false conditions We've found another bug in the code generation logic conditions for a certain class of always-false conditions, those of the form if ((a & 1) < 0) These only reach the back end when compiling without optimization. The bug was introduced by the choice of using TEST UNDER MASK to implement a check for if ((a & MASK) < VAL) as if ((a & MASK) == 0) where VAL is less than the the lowest bit of MASK. This is correct in all cases except for VAL == 0, in which case the original condition is always false, but the replacement isn't. Fixed by excluding that particular case. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259940 91177308-0d34-0410-b5e6-96231b3b80d8 commit 56d368f5a52e60fa29891a6647034fffbba8713b Author: Hans Wennborg Date: Fri Feb 5 16:30:31 2016 +0000 Merging r259886 and r259888: ------------------------------------------------------------------------ r259886 | nemanjai | 2016-02-05 06:50:29 -0800 (Fri, 05 Feb 2016) | 5 lines Fix for PR 26193 This is a simple fix for a PowerPC intrinsic that was incorrectly defined (the return type was incorrect). ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259888 | nemanjai | 2016-02-05 07:03:17 -0800 (Fri, 05 Feb 2016) | 3 lines Add the missing test case for PR26193 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259891 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9be4dc8ab20a009ed5f24610888421ba84f8ec65 Author: Hans Wennborg Date: Fri Feb 5 00:55:39 2016 +0000 Merging r259840 on top of r259178: ------------------------------------------------------------------------ r259178 | echristo | 2016-01-28 23:20:30 -0800 (Thu, 28 Jan 2016) | 1 line Refactor common code for PPC fast isel load immediate selection. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259840 | nemanjai | 2016-02-04 15:14:42 -0800 (Thu, 04 Feb 2016) | 7 lines Fix for PR 26356 Using the load immediate only when the immediate (whether signed or unsigned) can fit in a 16-bit signed field. Namely, from -32768 to 32767 for signed and 0 to 65535 for unsigned. This patch also ensures that we sign-extend under the right conditions. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259858 91177308-0d34-0410-b5e6-96231b3b80d8 commit 12d60e9e7c149a7d333e277dfbe25a720c88c585 Author: Hans Wennborg Date: Fri Feb 5 00:46:12 2016 +0000 Merging r259798, r259835: ------------------------------------------------------------------------ r259798 | nemanjai | 2016-02-04 08:18:08 -0800 (Thu, 04 Feb 2016) | 9 lines Enable the %s modifier in inline asm template string This patch corresponds to review: http://reviews.llvm.org/D16847 There are some files in glibc that use the output operand modifier even though it was deprecated in GCC. This patch just adds support for it to prevent issues with such files. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259835 | nemanjai | 2016-02-04 14:36:10 -0800 (Thu, 04 Feb 2016) | 3 lines Provide a test case for rl259798 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259856 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78a7d49140626994c23367b709e7b30b41e5cf70 Author: Hans Wennborg Date: Thu Feb 4 16:59:45 2016 +0000 Merging r259695: ------------------------------------------------------------------------ r259695 | tfiala | 2016-02-03 13:13:23 -0800 (Wed, 03 Feb 2016) | 11 lines Address NDEBUG-related linkage issues for Value::assertModuleIsMaterialized() The IR/Value class had a linkage issue present when LLVM was built as a library, and the LLVM library build time had different settings for NDEBUG than the client of the LLVM library. Clients could get into a state where the LLVM lib expected Value::assertModuleIsMaterialized() to be inline-defined in the header but clients expected that method to be defined in the LLVM library. See this llvm-commits thread for more details: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160201/329667.html ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259801 91177308-0d34-0410-b5e6-96231b3b80d8 commit 19b86f670bb5005761ecdcbe41423fee7fd200cf Author: Hans Wennborg Date: Thu Feb 4 02:16:36 2016 +0000 Merging r259740: ------------------------------------------------------------------------ r259740 | nemanjai | 2016-02-03 17:58:20 -0800 (Wed, 03 Feb 2016) | 2 lines Test case for PR 26381 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259743 91177308-0d34-0410-b5e6-96231b3b80d8 commit 0a7ec6ced609c340fc4028aa8a65996623dd4181 Author: Hans Wennborg Date: Wed Feb 3 22:00:13 2016 +0000 Merging r259177: ------------------------------------------------------------------------ r259177 | echristo | 2016-01-28 23:20:01 -0800 (Thu, 28 Jan 2016) | 5 lines Since LI/LIS sign extend the constant passed into the instruction we should check that the sign extended constant fits into 16-bits if we want a zero extended value, otherwise go ahead and put it together piecemeal. Fixes PR26356. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259713 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6b78a48f5c068df653f1c12d2ad7832aaa45c7a1 Author: Hans Wennborg Date: Wed Feb 3 21:24:31 2016 +0000 Merging r259649: ------------------------------------------------------------------------ r259649 | jamesm | 2016-02-03 07:05:06 -0800 (Wed, 03 Feb 2016) | 11 lines [DemandedBits] Revert r249687 due to PR26071 This regresses a test in LoopVectorize, so I'll need to go away and think about how to solve this in a way that isn't broken. From the writeup in PR26071: What's happening is that ComputeKnownZeroes is telling us that all bits except the LSB are zero. We're then deciding that only the LSB needs to be demanded from the icmp's inputs. This is where we're wrong - we're assuming that after simplification the bits that were known zero will continue to be known zero. But they're not - during trivialization the upper bits get changed (because an XOR isn't shrunk), so the icmp fails. The fault is in demandedbits - its contract does clearly state that a non-demanded bit may either be zero or one. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259699 91177308-0d34-0410-b5e6-96231b3b80d8 commit 18a86c95fc36b5f622e8dc87f71252de37a1ed44 Author: Hans Wennborg Date: Wed Feb 3 21:18:35 2016 +0000 Merging r259645: ------------------------------------------------------------------------ r259645 | nemanjai | 2016-02-03 04:53:38 -0800 (Wed, 03 Feb 2016) | 4 lines Fix for PR 26381 Simple fix - Constant values were not being sign extended in FastIsel. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259698 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1bfe978e5d0ac77f381b0ccef78204f7f3593a01 Author: Hans Wennborg Date: Tue Feb 2 17:41:39 2016 +0000 Merging r259346 (with adjustments for r258867): ------------------------------------------------------------------------ r259346 | ibreger | 2016-02-01 01:57:15 -0800 (Mon, 01 Feb 2016) | 3 lines AVX512: fix mask handling for gather/scatter/prefetch intrinsics. Differential Revision: http://reviews.llvm.org/D16755 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259536 91177308-0d34-0410-b5e6-96231b3b80d8 commit f24a5b58cd7ecc4fada221308073b9f13672d6c0 Author: Hans Wennborg Date: Tue Feb 2 17:35:07 2016 +0000 Merging r259342 (with s/p2align 4/align 16) because r258750 is not in 3.8. ------------------------------------------------------------------------ r259342 | ibreger | 2016-01-31 23:56:09 -0800 (Sun, 31 Jan 2016) | 3 lines AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259533 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ea3635939d3e30182cd5a9881447890c8b69c42 Author: Hans Wennborg Date: Mon Feb 1 19:18:10 2016 +0000 Merging r259375: ------------------------------------------------------------------------ r259375 | majnemer | 2016-02-01 09:37:56 -0800 (Mon, 01 Feb 2016) | 6 lines [InstCombine] Don't transform (X+INT_MAX)>=(Y+INT_MAX) -> (X<=Y) This miscompile came about because we tried to use a transform which was only appropriate for xor operators when addition was present. This fixes PR26407. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259390 91177308-0d34-0410-b5e6-96231b3b80d8 commit aad888f28ee3e920b6e1a3828398f6c9c256f3d3 Author: Tim Northover Date: Fri Jan 29 22:00:06 2016 +0000 Merging r259228: ------------------------------------------------------------------------ r259228 | tnorthover | 2016-01-29 11:18:46 -0800 (Fri, 29 Jan 2016) | 13 lines ARM: don't mangle DAG constant if it has more than one use The basic optimisation was to convert (mul $LHS, $complex_constant) into roughly "(shl (mul $LHS, $simple_constant), $simple_amt)" when it was expected to be cheaper. The original logic checks that the mul only has one use (since we're mangling $complex_constant), but when used in even more complex addressing modes there may be an outer addition that can pick up the wrong value too. I *think* the ARM addressing-mode problem is actually unreachable at the moment, but that depends on complex assessments of the profitability of pre-increment addressing modes so I've put a real check in there instead of an assertion. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259247 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ad5d2c5359a4e878c732db59ee7fc6e0a25dc00 Author: Hans Wennborg Date: Fri Jan 29 21:33:02 2016 +0000 Merging r259236: ------------------------------------------------------------------------ r259236 | spatel | 2016-01-29 12:21:02 -0800 (Fri, 29 Jan 2016) | 8 lines [InstCombine] avoid an insertelement transformation that induces the opposite extractelement fold (PR26354) We would infinite loop because we created a shufflevector that was wider than needed and then failed to combine that with the insertelement. When subsequently visiting the extractelement from that shuffle, we see that it's unnecessary, delete it, and trigger another visit to the insertelement. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259245 91177308-0d34-0410-b5e6-96231b3b80d8 commit cd30d75375a03a290c6621da13cbab4f10545c56 Author: Tom Stellard Date: Fri Jan 29 16:45:55 2016 +0000 Merging r258922: ------------------------------------------------------------------------ r258922 | marek.olsak | 2016-01-27 06:19:45 -0500 (Wed, 27 Jan 2016) | 12 lines AMDGPU/SI: Stoney has only 16 LDS banks Summary: This is a candidate for stable, along with all patches that add the "stoney" processor. Reviewers: tstellarAMD Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16485 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259207 91177308-0d34-0410-b5e6-96231b3b80d8 commit a8a522e4217a621114bedcb1cedee056c59a6273 Author: Tom Stellard Date: Fri Jan 29 16:45:52 2016 +0000 Merging r257666: ------------------------------------------------------------------------ r257666 | changpeng.fang | 2016-01-13 15:39:25 -0500 (Wed, 13 Jan 2016) | 2 lines AMDGPU/SI: Update ISA version for FIJI ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259206 91177308-0d34-0410-b5e6-96231b3b80d8 commit c3c52626df3d5b9bd06b160450da8335deb24dc8 Author: Daniel Sanders Date: Thu Jan 28 21:05:40 2016 +0000 Bring back the test-suite export in test-release without bringing back the build failures. Summary: r257791 disabled the test-suite export since the addition of CMakeLists.txt was causing build failures. This patch exports the test-suite again but does so outside the source tree so that it isn't included in the Phase[123] builds. Reviewers: hans Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D16679 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259093 91177308-0d34-0410-b5e6-96231b3b80d8 commit 72901a8afaae6c9f8ea63ba1c9c9d4699c7eec49 Author: Hans Wennborg Date: Thu Jan 28 18:23:25 2016 +0000 Merging r258971: ------------------------------------------------------------------------ r258971 | spatel | 2016-01-27 11:22:45 -0800 (Wed, 27 Jan 2016) | 26 lines [SimplifyCFG] limit recursion depth when speculating instructions (PR26308) This is a fix for: https://llvm.org/bugs/show_bug.cgi?id=26308 With the switch to using the TTI cost model in: http://reviews.llvm.org/rL228826 ...it became possible to hit a zero-cost cycle of instructions (gep -> phi -> gep...), so we need a cap for the recursion in DominatesMergePoint(). A recursion depth parameter was already added for a different reason in: http://reviews.llvm.org/rL255660 ...so we can just set a limit for it. I pulled "10" out of the air and made it an independent parameter that we can play with. It might be higher than it needs to be given the currently low default value of PHINodeFoldingThreshold (2). That's the starting cost value that we enter the recursion with, and most instructions have cost set to TCC_Basic (1), so I don't think we're going to speculate more than 2 instructions with the current parameters. As noted in the review and the TODO comment, we can do better than just limiting recursion depth. Differential Revision: http://reviews.llvm.org/D16637 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259066 91177308-0d34-0410-b5e6-96231b3b80d8 commit 131d76722983cb030c392bcb50bba940e98ea0c6 Author: Hans Wennborg Date: Thu Jan 28 18:16:55 2016 +0000 Merging r258471: ------------------------------------------------------------------------ r258471 | pirama | 2016-01-21 17:16:57 -0800 (Thu, 21 Jan 2016) | 14 lines Do not lower VSETCC if operand is an f16 vector Summary: SETCC with f16 vectors has OperationAction set to Expand but still gets lowered to FCM* intrinsics based on its result type. This patch skips lowering of VSETCC if the operand is an f16 vector. v4 and v8 tests included. Reviewers: ab, jmolloy Subscribers: srhines, llvm-commits Differential Revision: http://reviews.llvm.org/D15361 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259064 91177308-0d34-0410-b5e6-96231b3b80d8 commit 82cf8c0ebce3d4cac59da2cc36df0c0cd9730d72 Author: Hans Wennborg Date: Wed Jan 27 00:19:52 2016 +0000 Merging r258891: ------------------------------------------------------------------------ r258891 | hans | 2016-01-26 16:19:05 -0800 (Tue, 26 Jan 2016) | 25 lines test-release.sh: Ignore LC_CTYPE in sed invocation on Darwin Here, sed is used to prepare object files for comparison via cmp. On my Darwin 15.4.0 machine, LC_CTYPE is set to UTF-8 (by default, I believe). Under these circumstances, anything sed is made to read will be treated as UTF-8, prompting it to signal an error if it is not, like so: % sed s/a/b/ <(head -n1 /dev/random) >/dev/null; echo $? sed: RE error: illegal byte sequence 1 % To make sed work as expected, I need to set LC_CTYPE to C: % env LC_CTYPE=C sed s/a/b/ <(head -n1 /dev/random) >/dev/null; echo $? 0 % Without this change, sed will exit with an error for every single file that it compares between phase 2 and phase 3, thereby making it look as if the differences were far larger than they are. Patch by Elias Pipping! Differential Revision: http://reviews.llvm.org/D16548 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258892 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5eefadb302242035deaf04c5585bb4cd46125deb Author: Tom Stellard Date: Tue Jan 26 23:57:01 2016 +0000 Merging r258386: ------------------------------------------------------------------------ r258386 | thomas.stellard | 2016-01-20 23:28:34 -0500 (Wed, 20 Jan 2016) | 14 lines AMDGPU/SI: Pass whether to use the SI scheduler via Target Attribute Summary: Currently the SI scheduler can be selected via command line option, but it turned out it would be better if it was selectable via a Target Attribute. This patch adds "si-scheduler" attribute to the backend. Reviewers: tstellarAMD, echristo Subscribers: echristo, arsenm Differential Revision: http://reviews.llvm.org/D16192 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258885 91177308-0d34-0410-b5e6-96231b3b80d8 commit 44fb5881d8edf448d6231a5b8df583aecd6bcd42 Author: Sanjoy Das Date: Tue Jan 26 22:29:46 2016 +0000 Merging r258184: ------------------------------------------------------------------------ r258184 | sanjoy | 2016-01-19 12:53:51 -0800 (Tue, 19 Jan 2016) | 20 lines [SCEV] Fix PR26207 In some cases, the max backedge taken count can be more conservative than the exact backedge taken count (for instance, because ScalarEvolution::getRange is not control-flow sensitive whereas computeExitLimitFromICmp can be). In these cases, computeExitLimitFromCond (specifically the bit that deals with `and` and `or` instructions) can create an ExitLimit instance with a `SCEVCouldNotCompute` max backedge count expression, but a computable exact backedge count expression. This violates an implicit SCEV assumption: a computable exact BE count should imply a computable max BE count. This change - Makes the above implicit invariant explicit by adding an assert to ExitLimit's constructor - Changes `computeExitLimitFromCond` to be more robust around conservative max backedge counts ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258869 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4d1ef71f362e014aaaaefeb36abe83c24b578e40 Author: Hans Wennborg Date: Tue Jan 26 19:44:49 2016 +0000 Revert accidental changes from r258805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258844 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9a498947cdb25737faecfdabcb64848432c49d68 Author: Dimitry Andric Date: Tue Jan 26 19:43:59 2016 +0000 Merging r258436: ------------------------------------------------------------------------ r258436 | dim | 2016-01-21 22:57:49 +0100 (Thu, 21 Jan 2016) | 17 lines Let test-release.sh checkout subprojects directly into the target tree, instead of using symlinks Summary: In the past I have run into several problems with the way `test-release.sh` creates all the subproject directories as siblings, and then uses symlinks to stitch them all together. In some scenarios this leads to clang not being able to find header files, etc. This patch changes the script so it directly exports into the correct target locations for each subproject. Reviewers: hans Subscribers: emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D16420 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258842 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4b85564ba4a41465155b9128a68e5e14fea78365 Author: Hans Wennborg Date: Tue Jan 26 19:31:16 2016 +0000 Merging r258729: ------------------------------------------------------------------------ r258729 | matze | 2016-01-25 14:08:25 -0800 (Mon, 25 Jan 2016) | 13 lines X86ISelLowering: Fix cmov(cmov) special lowering bug There's a special case in EmitLoweredSelect() that produces an improved lowering for cmov(cmov) patterns. However this special lowering is currently broken if the inner cmov has multiple users so this patch stops using it in this case. If you wonder why this wasn't fixed by continuing to use the special lowering and inserting a 2nd PHI for the inner cmov: I believe this would incur additional copies/register pressure so the special lowering does not improve upon the normal one anymore in this case. This fixes http://llvm.org/PR26256 (= rdar://24329747) ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258840 91177308-0d34-0410-b5e6-96231b3b80d8 commit db6cb1a90cd0ab35e2dadc97962a5d67742c0bbc Author: James Molloy Date: Tue Jan 26 13:30:49 2016 +0000 Merging r258690: ------------------------------------------------------------------------ r258690 | jamesm | 2016-01-25 14:49:36 +0000 (Mon, 25 Jan 2016) | 7 lines [DemandedBits] Fix computation of demanded bits for ICmps The computation of ICmp demanded bits is independent of the individual operand being evaluated. We simply return a mask consisting of the minimum leading zeroes of both operands. We were incorrectly passing "I" to ComputeKnownBits - this should be "UserI->getOperand(0)". In cases where we were evaluating the 1th operand, we were taking the minimum leading zeroes of it and itself. This should fix PR26266. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258805 91177308-0d34-0410-b5e6-96231b3b80d8 commit 836d2ad83c5e955a23f6e3b78418cb250c95c88b Author: Hans Wennborg Date: Mon Jan 25 22:24:50 2016 +0000 Merging r258406: ------------------------------------------------------------------------ r258406 | vedantk | 2016-01-21 09:04:42 -0800 (Thu, 21 Jan 2016) | 16 lines [GCOV] Avoid emitting profile arcs for module and skeleton CUs Do not emit profile arc files and note files for module and skeleton CU's. Our users report seeing unexpected *.gcda and *.gcno files in their projects when using gcov-style profiling with modules or frameworks. The unwanted files come from these modules. This is not very helpful for end-users. Further, we've seen reports of instrumented programs crashing while writing these files out (due to I/O failures). rdar://problem/22838296 Reviewed-by: aprantl Differential Revision: http://reviews.llvm.org/D15997 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258731 91177308-0d34-0410-b5e6-96231b3b80d8 commit 16f83af7618a4dfb4ef1891b07adb23cc54e4c86 Author: Hans Wennborg Date: Fri Jan 22 18:37:31 2016 +0000 Merging r258416 and r258428: ------------------------------------------------------------------------ r258416 | spatel | 2016-01-21 10:01:57 -0800 (Thu, 21 Jan 2016) | 2 lines make helper functions static; NFCI ------------------------------------------------------------------------ ------------------------------------------------------------------------ r258428 | spatel | 2016-01-21 12:19:54 -0800 (Thu, 21 Jan 2016) | 15 lines [LibCallSimplifier] don't get fooled by a fake fmin() This is similar to the bug/fix: https://llvm.org/bugs/show_bug.cgi?id=26211 http://reviews.llvm.org/rL258325 The fmin() test case reveals another bug caused by sloppy code duplication. It will crash without this patch because fp128 is a valid floating-point type, but we would think that we had matched a function that used doubles. The new helper function can be used to replace similar checks that are used in several other places in this file. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258512 91177308-0d34-0410-b5e6-96231b3b80d8 commit c89d9654310e0f5b1171888c6573d09c9e66d0c4 Author: Hans Wennborg Date: Fri Jan 22 18:26:38 2016 +0000 Merging r257886: ------------------------------------------------------------------------ r257886 | jamesm | 2016-01-15 02:36:01 -0800 (Fri, 15 Jan 2016) | 3 lines [CodeGenPrepare] Try and appease sanitizers dupRetToEnableTailCallOpts(BB) can invalidate BB. It must run *after* we iterate across BB! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258510 91177308-0d34-0410-b5e6-96231b3b80d8 commit 961a0e424cc7a63ee57cc8506c8a04cbf6012e1f Author: Hans Wennborg Date: Wed Jan 20 21:49:02 2016 +0000 Merging r258325: ------------------------------------------------------------------------ r258325 | spatel | 2016-01-20 09:41:14 -0800 (Wed, 20 Jan 2016) | 21 lines [LibCallSimplifier] don't get fooled by a fake sqrt() The test case will crash without this patch because the subsequent call to hasUnsafeAlgebra() assumes that the call instruction is an FPMathOperator (ie, returns an FP type). This part of the function signature check was omitted for the sqrt() case, but seems to be in place for all other transforms. Before: http://reviews.llvm.org/rL257400 ...we would have needlessly continued execution in optimizeSqrt(), but the bug was harmless because we'd eventually fail some other check and return without damage. This should fix: https://llvm.org/bugs/show_bug.cgi?id=26211 Differential Revision: http://reviews.llvm.org/D16198 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258353 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3acb8d3c6d4e470172fb244c809bc6fdd7948c29 Author: Hans Wennborg Date: Wed Jan 20 21:30:57 2016 +0000 Merging r257940: ------------------------------------------------------------------------ r257940 | djg | 2016-01-15 13:56:40 -0800 (Fri, 15 Jan 2016) | 10 lines [SelectionDAG] CSE nodes with differing SDNodeFlags In the optimizer (GVN etc.) when eliminating redundant nodes with different flags, the flags are ignored for the purposes of testing for congruence, and then intersected for the purposes of producing a result that supports the union of all the uses. This commit makes SelectionDAG's CSE do the same thing, allowing it to CSE nodes in more cases. This fixes PR26063. Differential Revision: http://reviews.llvm.org/D15957 ------------------------------------------------------------------------ Merging r257942: ------------------------------------------------------------------------ r257942 | djg | 2016-01-15 14:07:35 -0800 (Fri, 15 Jan 2016) | 2 lines Remove a now-empty file left behind by r257940. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258351 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3260476414aa2e03566d205f742220a382f4ce07 Author: Hans Wennborg Date: Wed Jan 20 21:14:05 2016 +0000 Merging r258273: ------------------------------------------------------------------------ r258273 | josepht | 2016-01-19 18:15:15 -0800 (Tue, 19 Jan 2016) | 37 lines [Inliner/WinEH] Honor implicit nounwinds Summary: Funclet EH tables require that a given funclet have only one unwind destination for exceptional exits. The verifier will therefore reject e.g. two cleanuprets with different unwind dests for the same cleanup, or two invokes exiting the same funclet but to different unwind dests. Because catchswitch has no 'nounwind' variant, and because IR producers are not *required* to annotate calls which will not unwind as 'nounwind', it is legal to nest a call or an "unwind to caller" catchswitch within a funclet pad that has an unwind destination other than caller; it is undefined behavior for such a call or catchswitch to unwind. Normally when inlining an invoke, calls in the inlined sequence are rewritten to invokes that unwind to the callsite invoke's unwind destination, and "unwind to caller" catchswitches in the inlined sequence are rewritten to unwind to the callsite invoke's unwind destination. However, if such a call or "unwind to caller" catchswitch is located in a callee funclet that has another exceptional exit with an unwind destination within the callee, applying the normal transformation would give that callee funclet multiple unwind destinations for its exceptional exits. There would be no way for EH table generation to determine which is the "true" exit, and the verifier would reject the function accordingly. Add logic to the inliner to detect these cases and leave such calls and "unwind to caller" catchswitches as calls and "unwind to caller" catchswitches in the inlined sequence. This fixes PR26147. Reviewers: rnk, andrew.w.kaylor, majnemer Subscribers: alexcrichton, llvm-commits Differential Revision: http://reviews.llvm.org/D16319 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258349 91177308-0d34-0410-b5e6-96231b3b80d8 commit 38e40410b1fa6441db511e760bc6ae263a8bbaee Author: Renato Golin Date: Wed Jan 20 18:01:05 2016 +0000 Merging r258308: [AArch64] Fix two bugs in the .inst directive git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258326 91177308-0d34-0410-b5e6-96231b3b80d8 commit 870ff87d1cd25f9a2dd01d7c75489a63eca377c2 Author: Quentin Colombet Date: Wed Jan 20 01:14:03 2016 +0000 Merging r258221: ------------------------------------------------------------------------ r258221 | qcolombet | 2016-01-19 15:29:03 -0800 (Tue, 19 Jan 2016) | 8 lines [X86] Do not run shrink-wrapping on function with split-stack attribute or HiPE calling convention. The implementation of the related callbacks in the x86 backend for such functions are not ready to deal with a prologue block that is not the entry block of the function. This fixes PR26107, but the longer term solution would be to fix those callbacks. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258269 91177308-0d34-0410-b5e6-96231b3b80d8 commit 90fef5a5b6514f60396e81d7fa20581d05ca659b Author: Quentin Colombet Date: Wed Jan 20 01:09:12 2016 +0000 Merging r258207: ------------------------------------------------------------------------ r258207 | qcolombet | 2016-01-19 14:31:12 -0800 (Tue, 19 Jan 2016) | 1 line [MachineFunction] Constify getter. NFC. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258268 91177308-0d34-0410-b5e6-96231b3b80d8 commit 770ec8cf9ae215e26cb6d946b9d533151fe0558d Author: Hans Wennborg Date: Wed Jan 20 00:48:30 2016 +0000 Merging r257977: ------------------------------------------------------------------------ r257977 | kfischer | 2016-01-15 17:11:33 -0800 (Fri, 15 Jan 2016) | 1 line [DwarfDebug] Move MergeValues to .cpp, NFC ------------------------------------------------------------------------ Merging r257979: ------------------------------------------------------------------------ r257979 | kfischer | 2016-01-15 17:15:32 -0800 (Fri, 15 Jan 2016) | 11 lines [DwarfDebug] Don't merge DebugLocEntries if their pieces overlap Summary: Later in DWARF emission we check that DebugLocEntries have non-overlapping pieces, so we should create any such entries by merging here. Fixes PR26163. Reviewers: aprantl Differential Revision: http://reviews.llvm.org/D16249 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258267 91177308-0d34-0410-b5e6-96231b3b80d8 commit d26a2e75e58f56a289b911c0bf582be4f8f655f1 Author: NAKAMURA Takumi Date: Wed Jan 20 00:32:09 2016 +0000 [r257857] lli: use llvm::utostr() instead of std::to_string(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258266 91177308-0d34-0410-b5e6-96231b3b80d8 commit 316ab7147bf233fd6a84977173f99b3fc9a26e0e Author: NAKAMURA Takumi Date: Wed Jan 20 00:28:22 2016 +0000 [r257732] Mark remote-JIT tests as XFAIL, as well as win32, for targeting mingw32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258265 91177308-0d34-0410-b5e6-96231b3b80d8 commit f5575ecd57c4ab8cdae1a80fecc01029d14fe4e6 Author: Hans Wennborg Date: Tue Jan 19 20:49:25 2016 +0000 Merging r257875: ------------------------------------------------------------------------ r257875 | jamesm | 2016-01-15 01:20:19 -0800 (Fri, 15 Jan 2016) | 11 lines [InstCombine] Rewrite bswap/bitreverse handling completely. There are several requirements that ended up with this design; 1. Matching bitreversals is too heavyweight for InstCombine and doesn't really need to be done so early. 2. Bitreversals and byteswaps are very related in their matching logic. 3. We want to implement support for matching more advanced bswap/bitreverse patterns like partial bswaps/bitreverses. 4. Bswaps are best matched early in InstCombine. The result of these is that a new utility function is created in Transforms/Utils/Local.h that can be configured to search for bswaps, bitreverses or both. InstCombine uses it to find only bswaps, CGP uses it to find only bitreversals. We can then extend the matching logic in one place only. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258180 91177308-0d34-0410-b5e6-96231b3b80d8 commit e12bf2aba135af15b33cca8a8c0fb80189a16b80 Author: Hans Wennborg Date: Tue Jan 19 19:28:41 2016 +0000 Merging r258168: ------------------------------------------------------------------------ r258168 | hans | 2016-01-19 11:21:58 -0800 (Tue, 19 Jan 2016) | 3 lines test-release.sh: Use CMake also for Darwin This didn't work for 3.7, but hopefully it should work now. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258170 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1618eb04cdfdd3febf77bc67cdac5307e5528b96 Author: Hans Wennborg Date: Tue Jan 19 18:53:02 2016 +0000 Merging r257925, r257929, r257930, and r257997: ------------------------------------------------------------------------ r257925 | mren | 2016-01-15 11:35:42 -0800 (Fri, 15 Jan 2016) | 10 lines CXX_FAST_TLS calling convention: fix issue on X86-64. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. I will commit fixes to other platforms as well. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257929 | mren | 2016-01-15 12:13:28 -0800 (Fri, 15 Jan 2016) | 10 lines CXX_FAST_TLS calling convention: fix issue on AArch64. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. I will commit fixes to other platforms as well. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257930 | mren | 2016-01-15 12:24:11 -0800 (Fri, 15 Jan 2016) | 8 lines CXX_FAST_TLS calling convention: fix issue on ARM. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257997 | mren | 2016-01-16 08:39:46 -0800 (Sat, 16 Jan 2016) | 12 lines CXX_FAST_TLS calling convention: fix issue on x86-64. %RBP can't be handled explicitly. We generate the following code: pushq %rbp movq %rsp, %rbp ... movq %rbx, (%rbp) ## 8-byte Spill where %rbp will be overwritten by the spilled value. The fix is to let PEI handle %RBP. PR26136 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258162 91177308-0d34-0410-b5e6-96231b3b80d8 commit aa96fb86c3304e81c2f53700223d0e795c302276 Author: Hans Wennborg Date: Tue Jan 19 18:26:37 2016 +0000 Merging r257902 (and r257775) ------------------------------------------------------------------------ r257775 | jyknight | 2016-01-14 08:33:21 -0800 (Thu, 14 Jan 2016) | 3 lines Revert "Stop increasing alignment of externally-visible globals on ELF platforms." This reverts commit r257719, due to PR26144. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257902 | jyknight | 2016-01-15 08:33:06 -0800 (Fri, 15 Jan 2016) | 17 lines Stop increasing alignment of externally-visible globals on ELF platforms. With ELF, the alignment of a global variable in a shared library will get copied into an executables linked against it, if the executable even accesss the variable. So, it's not possible to implicitly increase alignment based on access patterns, or you'll break existing binaries. This happened to affect libc++'s std::cout symbol, for example. See thread: http://thread.gmane.org/gmane.comp.compilers.clang.devel/45311 (This is a re-commit of r257719, without the bug reported in PR26144. I've tweaked the code to not assert-fail in enforceKnownAlignment when computeKnownBits doesn't recurse far enough to find the underlying Alloca/GlobalObject value.) Differential Revision: http://reviews.llvm.org/D16145 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258155 91177308-0d34-0410-b5e6-96231b3b80d8 commit ed504bedd7420790d55e441e35e5449eaa40029e Author: Hans Wennborg Date: Tue Jan 19 17:28:24 2016 +0000 Merging r257905: ------------------------------------------------------------------------ r257905 | hans | 2016-01-15 09:04:45 -0800 (Fri, 15 Jan 2016) | 3 lines test-release.sh: Fix clang-tools-extra symlink for CMake build The CMake and Autoconf builds want the symlink set up differently. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258146 91177308-0d34-0410-b5e6-96231b3b80d8 commit c1316b6adfbb17b961a3bee357e728ca0d4d1c96 Author: Hans Wennborg Date: Thu Jan 14 23:24:17 2016 +0000 Merging r257791: ------------------------------------------------------------------------ r257791 | hans | 2016-01-14 11:21:14 -0800 (Thu, 14 Jan 2016) | 4 lines Exclude test-suite from CMake builds in test-release.sh It's broken. In 3.7 there wasn't a CMake build for test-suite at all, so we're not losing something we had before. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257836 91177308-0d34-0410-b5e6-96231b3b80d8 commit 25d64abdb39a834541edbafdc686f371dad58a76 Author: Hans Wennborg Date: Thu Jan 14 17:52:28 2016 +0000 Merging r257730: ------------------------------------------------------------------------ r257730 | majnemer | 2016-01-13 17:20:03 -0800 (Wed, 13 Jan 2016) | 11 lines [X86] Don't alter HasOpaqueSPAdjustment after we've relied on it We rely on HasOpaqueSPAdjustment not changing after we've calculated things based on it. Things like whether or not we can use 'rep;movs' to copy bytes around, that sort of thing. If it changes, invariants in the backend will quietly break. This situation arose when we had a call to memcpy *and* a COPY of the FLAGS register where we would attempt to reference local variables using %esi, a register that was clobbered by the 'rep;movs'. This fixes PR26124. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257779 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7b9eef037dbacab102881f19826fb04cfe69c7e7 Author: Hans Wennborg Date: Thu Jan 14 00:23:32 2016 +0000 ReleaseNotes.rst: a few entries from Rafael git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257725 91177308-0d34-0410-b5e6-96231b3b80d8 commit 53d8ef00d82460b9c8ce08617d91bbce8313d4a3 Author: Hans Wennborg Date: Wed Jan 13 21:18:59 2016 +0000 Merging r257648: ------------------------------------------------------------------------ r257648 | hans | 2016-01-13 10:59:45 -0800 (Wed, 13 Jan 2016) | 1 line Fix struct/class mismatch for MachineSchedContext ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257668 91177308-0d34-0410-b5e6-96231b3b80d8 commit 38fcb6f10f0ae867bfe796f26bf1a336bf0dddf0 Author: Dimitry Andric Date: Wed Jan 13 19:37:51 2016 +0000 Merging r257645: ------------------------------------------------------------------------ r257645 | dim | 2016-01-13 19:29:46 +0100 (Wed, 13 Jan 2016) | 22 lines Avoid undefined behavior in LinkAllPasses.h The LinkAllPasses.h file is included in several main programs, to force a large number of passes to be linked in. However, the ForcePassLinking constructor uses undefined behavior, since it calls member functions on `nullptr`, e.g.: ((llvm::Function*)nullptr)->viewCFGOnly(); llvm::RGPassManager RGM; ((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM); When the optimization level is -O2 or higher, the code below the first nullptr dereference is optimized away, and replaced by `ud2` (on x86). Therefore, the calls after that first dereference are never emitted. In my case, I noticed there was no call to `llvm::sys::RunningOnValgrind()`! Replace instances of dereferencing `nullptr` with either objects on the stack, or regular function calls. Differential Revision: http://reviews.llvm.org/D15996 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257660 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9faaefea9cbef6453486ed825c1ca4305bf68324 Author: Hans Wennborg Date: Wed Jan 13 19:03:44 2016 +0000 Drop 'svn' suffix from version. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257651 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ab5731312b6a8736fbe7fad1cb10f384b3a295e Author: Hans Wennborg Date: Wed Jan 13 17:34:56 2016 +0000 Creating release_38 branch off revision 257626 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257630 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1526147c0ad76667de046ef168d5cc5eee381bb7 Author: TB Schardl Date: Tue Jan 12 12:40:37 2016 +0000 Bug fix to include setSuccessor method on reattach instruction. commit 2b1b34e00cbc085a4a9a290c65fffaabae9517dc Author: TB Schardl Date: Thu Dec 31 04:05:48 2015 +0000 Add -instrument-cilk support to detach2cilk, cilkabi commit 4328b4468c0e42c1f89f5212e1386c38394edf20 Merge: 062301f913b 8a32dc47d61 Author: TB Schardl Date: Wed Dec 30 01:45:54 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 062301f913b5ac657607f0c758392ac8a18d5c13 Merge: 9893cc49b22 48a798cb4b4 Author: TB Schardl Date: Tue Dec 29 22:23:46 2015 +0000 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 9893cc49b223291071ea6633cd3f5c376acce9dd Author: TB Schardl Date: Tue Dec 29 22:22:01 2015 +0000 SimplifyCFG now removes unncessary Sync instructions. commit 48a798cb4b473470ad6ceaa6cc3e45dd569d0627 Merge: 54dbddeaec7 8d00ea68834 Author: Billy Moses Date: Wed Nov 11 10:50:51 2015 -0500 for counting commit 54dbddeaec7fa2bcdb3ad906c2cb99232342f00b Merge: 19481e914d1 88d51ce445e Author: Billy Moses Date: Wed Nov 11 10:18:55 2015 -0500 moded commit 8d00ea68834b61ce260b8111beb594cbdc8c78b9 Merge: 2ae39eb69c5 65cad952e45 Author: TB Schardl Date: Fri Nov 6 11:51:30 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 2ae39eb69c54cfb2206514873bca9cb1ac3738b0 Author: TB Schardl Date: Thu Nov 5 14:58:05 2015 +0000 [SimplifyCFG] Fixed bug where empty reattach blocks with multiple predecessors would crash this pass. commit 7bd0f59e1aa75abe8a238d1ec166d6148722ebdd Merge: 8ae8e06e3cd c135da21a3c Author: TB Schardl Date: Wed Nov 4 02:12:41 2015 +0000 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 8ae8e06e3cdf762ce50de096115ecfac5c998b63 Merge: a9530cd93a2 7e6636cb71f Author: TB Schardl Date: Wed Nov 4 01:26:22 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit c135da21a3cca833224099aeeac85aad0ec5144d Author: Billy Moses Date: Mon Nov 2 23:13:23 2015 -0500 all cleaned up & ready to go commit a9530cd93a293b6e21665883a74b42859061acd8 Merge: 329f5fad3f7 1965754e592 Author: TB Schardl Date: Fri Oct 16 14:24:35 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 329f5fad3f72dd84a3e4cf5818512a6b7e81c657 Merge: e0717ad48cc 600b09339de Author: TB Schardl Date: Fri Oct 16 00:37:12 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit e0717ad48cc7c447b4f1159116b06ff82c4efdd3 Merge: 20e95d87b5e 4b6405d130b Author: TB Schardl Date: Fri Oct 16 00:36:05 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 20e95d87b5e8234390f2b4cc6ef46a5ebea58e0c Merge: 44d4e427c7f bcd41c02dde Author: TB Schardl Date: Tue Oct 13 16:57:43 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit cac7ff23aac4127106c74d7cdaa5b6f11d3d5e00 Merge: ab253e4510c 387b1f61aad Author: Billy Moses Date: Tue Oct 13 12:34:49 2015 -0400 Merge branch 'master' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit ab253e4510c21e111e4c56fda345c19d3b232650 Author: Billy Moses Date: Tue Oct 13 12:34:31 2015 -0400 cache loop2cilk commit 44d4e427c7f008295af785fbad29857952be6d9a Merge: 387b1f61aad 938c3d3164e Author: TB Schardl Date: Tue Oct 13 12:52:44 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 387b1f61aad986ddc9032d82e2e48e9c5e1b064d Merge: 81e2fd12aea 3d58b720c31 Author: TB Schardl Date: Fri Oct 2 19:47:27 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 81e2fd12aea84c2ab59cd73cbcad2665a947ce0b Author: TB Schardl Date: Fri Oct 2 19:43:24 2015 +0000 Adding Detach2Cilksan pass to enable Cilksan race detection. commit 7a634e24c5bc7a520e8979646da17c09895f5425 Author: TB Schardl Date: Fri Oct 2 19:42:47 2015 +0000 Some debugging of Detach2Cilk commit cdf14afd5eeb21dedc32c3a62b1f76af95016974 Merge: 25f43658061 36caf0659ff Author: TB Schardl Date: Tue Sep 22 17:53:11 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 25f436580618875268ef313894e05802617bbdf0 Author: TB Schardl Date: Tue Sep 22 04:00:34 2015 +0000 Fixing loop rotation to prevent it from destroying sync instructions. commit 8ec1e7597748edd42654657f992aa4209bd04cf9 Merge: 4fc3d85490a dabf510ba1b Author: TB Schardl Date: Sun Sep 20 19:06:45 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 4fc3d85490a81d6adbd21b5f66646a9f397fe333 Author: TB Schardl Date: Sun Sep 20 19:06:24 2015 +0000 Fixed GVN to handle scalarPRE around detach/reattach and to abort load PRE in the event of an aliased access from a detach or sync. commit dc7cd94ca46ba477e113d2844de893b82b95b081 Author: TB Schardl Date: Sun Sep 20 19:05:06 2015 +0000 Updated AliasAnalysis to analyze detached blocks for aliasing information for detach and sync instructions. commit 421d2351ba4e14ff211a3c6cbe9258ccddf19afa Merge: 54b97afc6bc 29f50e97835 Author: TB Schardl Date: Sun Sep 13 12:11:13 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 54b97afc6bc145d1e28a8a3c94de524d809cddf1 Author: TB Schardl Date: Wed Sep 9 20:25:28 2015 +0000 Making syncs look like fences, in order to fix memory analysis issues. commit 4420c17e34959d2a33ba4c9fd9ae5ff6066f797a Merge: e6d3b51ad7d 3c76435341d Author: TB Schardl Date: Wed Sep 9 01:12:29 2015 +0000 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit e6d3b51ad7de5aaece38701cbe0b9401f481b13c Merge: eaa3d3ce261 9e01a11e67c Author: TB Schardl Date: Tue Sep 8 21:47:52 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 3c76435341d943764ecafb324971a254c95b39df Author: Billy Moses Date: Tue Sep 8 16:40:32 2015 -0400 Working parallel opt pass commit eaa3d3ce261db5812277ba6cd250ce501f77849c Merge: d9eeab4f9c8 3d88beedefc Author: TB Schardl Date: Tue Sep 8 17:14:47 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit d9eeab4f9c8bd662a771d87e73f61165c12cd14b Merge: f09f6e7a51b 7e316839810 Author: TB Schardl Date: Tue Sep 8 15:49:47 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 5f20c20dcf53f27e56915263e99d810bbf403697 Author: Billy Moses Date: Mon Sep 7 22:05:31 2015 -0400 Semi-working cilk pass commit f09f6e7a51b1b270a48d2f66312ff282f1ad6959 Author: TB Schardl Date: Fri Sep 4 12:13:17 2015 -0400 Fixed build problems with last merge. commit 8b666563572297a50f9a17efbd060e8f780f0f04 Merge: abe3f70de04 2354b37ae03 Author: TB Schardl Date: Fri Sep 4 11:40:09 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 41ddcdf5d8e40544ece73167368487f0195b1b5f Merge: fea705e7114 abe3f70de04 Author: Billy Moses Date: Tue Sep 1 23:17:47 2015 -0400 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit fea705e71145c13d37dcedf6b260ed38d75b7ad1 Merge: dd9331be0b0 19481e914d1 Author: Billy Moses Date: Tue Sep 1 17:14:52 2015 -0400 Merge branch 'tb-scratch' into billy-scratch commit dd9331be0b0f2c6172666774f3f9d3fb17121154 Author: Billy Moses Date: Tue Sep 1 17:13:27 2015 -0400 Commit detach pass before merge commit abe3f70de0450a6ff4d169e2f8a7c884f38b5b43 Merge: 61fde862bba ac515c40878 Author: TB Schardl Date: Tue Sep 1 16:59:07 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 61fde862bba820f143ea0545dc1804fe53523efc Merge: 19481e914d1 9907691f42a Author: TB Schardl Date: Sun Aug 30 09:37:44 2015 -0400 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 19481e914d1b1c4ee1db106d8f01b986ba4f90ae Merge: fadec4720ee 2b5188b98a3 Author: TB Schardl Date: Sun Aug 30 08:51:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit fadec4720ee7b66c5f4a362e2f0e0e8b2c127ce6 Merge: 4fcaa4205d2 43928f79096 Author: TB Schardl Date: Sat Aug 29 12:03:38 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 4fcaa4205d29c0c7c96d5e422f16db53db786e82 Author: TB Schardl Date: Sat Aug 29 12:03:11 2015 -0400 Fixed bug where JumpThreading would attempt to split reattach edges. commit 6342321c427d73af4fafe79c88d60d5945d192e2 Author: TB Schardl Date: Sat Aug 29 12:02:17 2015 -0400 Fixed bug where SCCP did not recognize detach/reattach/sync. commit cd5c25c6646f9fa4472be7f4148e938b3db180fc Author: TB Schardl Date: Fri Aug 28 18:12:45 2015 -0400 Removing dead code from SROA. commit 613e58985cd9077134dc120d465bbf4ad7c624b1 Merge: 16929701716 21f084aa722 Author: TB Schardl Date: Fri Aug 28 18:07:45 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 9907691f42ac9a51278d9a4fb20496f1a08531cc Author: Billy Moses Date: Thu Aug 27 14:58:29 2015 -0400 Add temporary hack to enable compiling serial version to executable commit 42a2eef9caf19027aac8829f2e90cc3194e87fe4 Merge: 703f88a7461 2d184c72270 Author: Billy Moses Date: Wed Aug 26 16:57:08 2015 -0400 Merge branch 'tb-scratch' into billy-scratch commit 16929701716110895498f4d5528c740355545472 Merge: 2d184c72270 4abce6e698a Author: TB Schardl Date: Tue Aug 25 14:00:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 2d184c7227076f1843ef28ab46c9a6736cb5faea Author: TB Schardl Date: Sun Aug 23 11:49:32 2015 -0400 Relaxed need for commutativity in serial TRE. commit aecdc8f291e3faa379ec24337be337095a685ea0 Author: TB Schardl Date: Sun Aug 23 11:49:03 2015 -0400 Fixed bug in BitcodeWriter with reattach causing opt to crash when emitting bitcode. commit f7f1cce493e65e181225f5d439cfdc1798717e2e Merge: 45d7087de1c 8724a428dfd Author: TB Schardl Date: Sat Aug 22 09:50:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 45d7087de1c8bc1360e107a30c937d9b24189f49 Author: TB Schardl Date: Sat Aug 22 09:43:10 2015 -0400 Draft enhancement to accumulator TRE to use identity values. commit 85eda242bd0b50027d4859450206d336e3e585f5 Merge: f135205b97a 0d125ca11e9 Author: TB Schardl Date: Tue Aug 18 11:25:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit f135205b97a8352113ff27c8fa8158aade75254a Author: TB Schardl Date: Tue Aug 18 10:40:50 2015 -0400 Adding 'getIdentity()' method to Instruction to enhance serial TRE. commit fe40d5f2a3d392c9836968fb0c8ba3df1ebc908c Merge: d3cdbb9137e 378e97e50c4 Author: TB Schardl Date: Mon Aug 17 08:52:52 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit d3cdbb9137e07f806ce69ae7f327749694b7b8b2 Merge: 653d0bbdd47 126b405bec6 Author: TB Schardl Date: Sat Aug 15 11:33:43 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 653d0bbdd47f7c8520941a9ea1ca5ce2d431bda5 Merge: 99611974297 26e17390798 Author: TB Schardl Date: Fri Aug 14 09:25:49 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 99611974297552647706e46eb290be13f1ee6a82 Merge: 4bf70c75ac9 22af77d94f3 Author: TB Schardl Date: Thu Aug 13 12:36:36 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 4bf70c75ac9f1d5eee6b5c2cbfbdb9b5d0de8f3b Merge: 4dec88872b7 a5ccfee2752 Author: TB Schardl Date: Tue Aug 11 13:31:22 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 4dec88872b7e91e3f680a304b98ee3f197f5e9db Merge: e2aac9890d9 abdf937a221 Author: TB Schardl Date: Mon Aug 10 12:53:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit e2aac9890d934cff8b1f09d5c31fa6c804b80bb0 Merge: 8b8574d13a1 73b16a70f16 Author: TB Schardl Date: Sat Aug 8 09:12:17 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 8b8574d13a13cab91984d55cb78ebfae7caaf941 Merge: 2ee8648835e 1962b1b6b7e Author: TB Schardl Date: Fri Aug 7 09:04:50 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 2ee8648835e211ba1a93501acb5ece9f3d5d406b Author: TB Schardl Date: Thu Aug 6 08:53:21 2015 -0400 Bug fix on marking Sync instructions as potentially reading or writing memory commit 156cf024ecde0d1a725e32239c3057c71297fcfa Merge: 7d823a9c882 7809bb2e968 Author: TB Schardl Date: Thu Aug 6 08:08:36 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7d823a9c882be773768c6c38d92cad7da9880b2f Author: TB Schardl Date: Thu Aug 6 08:08:02 2015 -0400 Adding SyncInst to set of instructions that may read/write memory. commit 383d9f685189d8294df1f988e7b2c328b2227873 Author: TB Schardl Date: Tue Aug 4 15:15:40 2015 -0400 Fixed typos from previous merge. commit 90a25b1e5633c00cec6a5dd77b998aeb9bfbfc19 Merge: 7907e1dbfd7 a639e155a28 Author: TB Schardl Date: Tue Aug 4 14:30:25 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7907e1dbfd714cc121978597e0e552b1aa6eb195 Merge: 9819737b739 c71235ab7d7 Author: TB Schardl Date: Fri Jul 31 08:49:30 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 9819737b7396013f3d6dee738b070f11b1a52e8c Merge: 2c1c7bc0320 dc9125e8d13 Author: TB Schardl Date: Wed Jul 29 08:48:13 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 2c1c7bc0320cf3d5a74b2ad8cf91f24fa641da97 Author: TB Schardl Date: Wed Jul 29 08:47:45 2015 -0400 Adding function to SimplifyCFG to elide detach statements whose continuation immediately syncs. commit c950f20aa21eca8300eed7b10f98e4b61109311d Author: TB Schardl Date: Tue Jul 28 10:48:23 2015 -0400 Added optimization to remove trivial reattach blocks. commit 86df0ba3770a03a8271a5bba7f1a3708b3f0d153 Merge: 3fbb3bcf4cb bf26b3fcaec Author: TB Schardl Date: Mon Jul 27 08:22:30 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3fbb3bcf4cbbe96c286774917025664dd8e2de80 Merge: 7bb5864b2ad 52f969b0298 Author: TB Schardl Date: Thu Jul 23 08:57:48 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7bb5864b2ad318eb969b7f8d78e6d5171a8b9cbc Merge: 9a2143e2643 717d8ad6cf4 Author: TB Schardl Date: Wed Jul 22 08:02:57 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 9a2143e26433557e7f1eac221099bd037e487e80 Merge: c9d4623ac37 c721349466d Author: TB Schardl Date: Tue Jul 21 08:42:46 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c9d4623ac37b0ba06e727dc71df3ec559a267762 Author: TB Schardl Date: Mon Jul 20 15:56:29 2015 -0400 Don't perform PRE across a detach or reattach, as it requires splitting a critical edge. commit e1df337ce92636114885f3268afaa571e279bcb2 Author: TB Schardl Date: Mon Jul 20 15:55:53 2015 -0400 Detach-reattach pairs create unsplittable critical edges. Add some asserts to check that we don't try to split those edges. commit 48ec13d545fde4c80f86132b330dec9c672c29b3 Author: TB Schardl Date: Mon Jul 20 13:32:44 2015 -0400 Minor edit to instruction combining to avoid pessimization of moving code after a sync. commit 46d9cfe4c634c7229c16623ca17f0b27d3c7ad28 Merge: c99bacd4cec 96d9043a78b Author: TB Schardl Date: Mon Jul 20 10:53:45 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c99bacd4cecc8f6a9b0f159d957c81ca90a53c06 Author: TB Schardl Date: Mon Jul 20 10:50:26 2015 -0400 Updating existing optimization passes to generate correct code around detach/reattach/sync instructions. Tested on fib and simple race example codes. Some passes, such as redundant instruction combining, are still pessimizations for these parallel codes. commit bf96714f54abff14ce58abec408cafb5367ab0fe Merge: c8594201bba 591adee23bf Author: TB Schardl Date: Fri Jul 17 09:14:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c8594201bba51bac12bf581ec5f11aff5e767f9b Merge: c10991b43d5 72400f8d508 Author: TB Schardl Date: Thu Jul 16 08:59:20 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c10991b43d5dedafd23d7579635da4e111fd598c Merge: 1d47de608d6 4aa2f4514cc Author: TB Schardl Date: Wed Jul 15 08:16:18 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 1d47de608d6e59908f715569137f5e2dac1f339a Merge: 3a70241cdea 815d6131a4d Author: TB Schardl Date: Tue Jul 14 09:19:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3a70241cdea09232c8e26cfe42e56fac598ed8ba Author: TB Schardl Date: Sun Jul 12 08:24:32 2015 -0400 Updated PromoteMemoryToRegister to properly handle reattach, specifically, to avoid promoting alloca's if doing so would require a Phi node to inherit register state through a reattach. commit 51d54d96cc3cdaec661ea2268e8dd6294b22375a Author: TB Schardl Date: Sun Jul 12 08:23:11 2015 -0400 Adjusting reattach to look more like a branch. commit d39d1f75be719678706e403c64d1a53f9387ef98 Author: TB Schardl Date: Sun Jul 12 08:22:20 2015 -0400 Updated comments in IRBuilder commit 08f1f890d00a14f4ffccdf7da44b8c7b0e5daa12 Merge: 3fa3c489669 1e3fa768c01 Author: TB Schardl Date: Sat Jul 11 07:59:42 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3fa3c489669220cef599f61adb52c0b3eba4bc0c Merge: 48100a712f7 e57b60a7f96 Author: TB Schardl Date: Fri Jul 10 08:48:24 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 48100a712f7dddd6578ec0f93fd55ae5ddc033ce Merge: 72a88786c60 86b4ed2fc40 Author: TB Schardl Date: Thu Jul 9 09:01:23 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 703f88a7461d9741c5d0203c02e702f48cda74e7 Merge: 5c355339f57 8e3d42ecb81 Author: Billy Moses Date: Wed Jul 8 21:51:37 2015 -0700 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit 5c355339f57181fbf8ce8e665ce4a5e1b18a6a35 Author: Billy Moses Date: Wed Jul 8 21:50:21 2015 -0700 fix merge error commit 72a88786c604e0c99dace11e7ab02b9bea53c7c4 Merge: ab1078ca539 080d7a819f4 Author: TB Schardl Date: Wed Jul 8 07:54:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit ab1078ca5394d4a132b9bfef2b45fe9936355c62 Author: TB Schardl Date: Tue Jul 7 23:59:33 2015 -0400 Rework reattach to take a basic block as an argument. Reattach is therefore like a break, while not being a break. commit 189cbf6873ffb4880a10098341abdc18447d38d3 Merge: 8e3d42ecb81 7b7c81cd353 Author: TB Schardl Date: Tue Jul 7 13:45:38 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 8e3d42ecb81ed3d9b8a9bc45e17ba151aaba45fc Author: TB Schardl Date: Tue Jul 7 08:58:35 2015 -0400 Initial hack to disallow SimplifyCFG from removing sync instructions commit 738e14f4a4dcb70e7e9e6ff1e0727b05ce14c008 Author: TB Schardl Date: Tue Jul 7 08:57:47 2015 -0400 Fix comments on SyncInst commit bf1508cc4427479a10092210237db6678c1ef6d5 Merge: 19e947bd14f 2822246ecee Author: TB Schardl Date: Tue Jul 7 08:18:24 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 19e947bd14f9b9e718ab634481a0a0d96962b216 Merge: de195a8462b a25ee390b55 Author: TB Schardl Date: Mon Jul 6 08:24:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit bb180502346ef66f459596d49bc26c15dc822f88 Merge: c6662084d9e de195a8462b Author: Billy Moses Date: Sun Jul 5 21:36:33 2015 -0700 Merge branch 'tb-scratch' into billy-scratch Conflicts: include/llvm/Bitcode/LLVMBitCodes.h lib/AsmParser/LLParser.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h lib/IR/Instructions.cpp commit de195a8462b52201728b19904a7ff895e3c2b8a0 Author: TB Schardl Date: Thu Jul 2 15:04:46 2015 -0400 Temporary sync instruction, in order to develop dominance analysis for CFG's with parallel control dependencies. commit 738db4461c0b4305c31f9feab72003012c2dcea8 Merge: 02ff4acf5a2 e4e6f29c93d Author: TB Schardl Date: Thu Jul 2 08:52:48 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 02ff4acf5a260ed830edf5f0764c49f3ce5bdfda Merge: 999aed1e3d0 7e6843cbd68 Author: TB Schardl Date: Wed Jul 1 09:43:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 999aed1e3d0c0df3d4d3d8f5b4ebbe7181834cba Author: TB Schardl Date: Wed Jul 1 09:42:21 2015 -0400 Teaching SCCP about 'detach' and 'reattach', such that optimization passes can run on codes with these IR instructions. commit d2f3f1e9b8c80feb8621e3897998a24c68365bed Merge: ea299f63c15 37cb5f1c2db Author: TB Schardl Date: Wed Jul 1 08:33:51 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit ea299f63c158dd1b90dcef36729f361c69f54505 Author: TB Schardl Date: Tue Jun 30 08:27:46 2015 -0400 Renamed 'spawn' to 'detach' commit f88a6553ebb86f8d5304a7b8df238b2274d936cd Merge: 3b6df76c9a1 a5106ca54d0 Author: TB Schardl Date: Tue Jun 30 08:03:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3b6df76c9a154c3ae22fe89569dfdac23637d12b Merge: e62bd55cd9d e8f07a7eb39 Author: TB Schardl Date: Mon Jun 29 09:50:59 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit e62bd55cd9d749090f9137363ae55ada11a2eb4e Merge: 4dc79856c77 43e99f618db Author: William Moses Date: Sun Jun 28 16:13:19 2015 -0700 Merge pull request #1 from taekwonbilly/billy-scratch Add reattach instruction commit c6662084d9ecac843815ba39681d6ad2b3c3faaf Author: Billy Moses Date: Sun Jun 28 16:09:53 2015 -0700 allow to compile commit 43e99f618db80683c40b98110a9320fb88f2b75f Author: Billy Moses Date: Sat Jun 27 13:20:11 2015 -0700 add token commit 1a4a51b9510224c583acc08555807713a26277e2 Author: Billy Moses Date: Thu Jun 25 22:35:46 2015 -0700 Reattach commit 5861430d7fe8c36d01e42d5a79765232d3733a55 Author: Billy Moses Date: Thu Jun 25 22:13:27 2015 -0700 Reattach commit 4dc79856c77887cd506b15fee5793608071c7b0d Author: TB Schardl Date: Thu Jun 25 14:08:59 2015 -0400 Cleanup: remove unnecessary space commit 592fd5576cc26e3a0ba7efe4918b0c8f94c54b0f Merge: 08297c02e75 eebe475623c Author: TB Schardl Date: Thu Jun 25 13:49:16 2015 -0400 Merge remote-tracking branch 'origin/tb-scratch' into pir commit 08297c02e75ec7416751d443a99239d464c90061 Author: TB Schardl Date: Thu Jun 25 13:46:26 2015 -0400 Porting spawn instruction to current LLVM master. Added CreateSpawn to IRBuilder. commit eebe475623c877375a6718b362a76e2bd8843e11 Author: TB Schardl Date: Wed May 27 10:00:32 2015 -0400 cleaning up directory to support compilation on my system within a separate build directory commit 41059692e83eacd80f90f7df15510f97ae7c679d Author: Billy Moses Date: Tue May 26 18:27:48 2015 -0400 fix commit 21846df31a8b5b45b82781e8f8a6eb8c9c8dcb0f Author: Billy Moses Date: Tue May 26 17:55:07 2015 -0400 rm scruff commit d3d85e53fb33660f44a60f9e1c04c133596a7344 Author: Billy Moses Date: Tue May 26 13:13:00 2015 -0400 cleanup commit 8cc15c93dcee39782e92168f85e67fb7db46d069 Merge: 218888afe22 f3fb567248e Author: Billy Moses Date: Tue May 26 13:11:33 2015 -0400 Merge branch 'master' of github.com:taekwonbilly/Parallel-IR fix issue commit 218888afe22e6c297f19a5641809492429be18a7 Author: Billy Moses Date: Tue May 26 13:09:58 2015 -0400 fixed commit f3fb567248ece821dd2cd77008d1be0c385e78b0 Author: Billy Moses Date: Wed May 20 16:45:43 2015 -0400 ud commit 8721e720eeb689bf1e9f3f401a4aa851725cc126 Author: Billy Moses Date: Wed Apr 15 09:57:41 2015 -0400 reset commit b1dd73bcb3e3adc89c78acf620b81a7271f261b3 Author: Billy Moses Date: Mon Apr 13 10:13:29 2015 -0400 Last commit before change syntax commit 5cdcb6294493acf8bf10274c3a4a6f1f70c6de36 Author: Billy Moses Date: Mon Apr 6 12:05:40 2015 -0400 Updated llvm / added Future Type commit 2ce961b4e05eab9cb04b59e73ee1209b74e39524 Author: Billy Moses Date: Wed Apr 1 11:04:13 2015 -0400 update llvm commit 5a8e342deb6ff3f9535890096b76731028740219 Author: Billy Moses Date: Tue Mar 17 22:55:24 2015 -0400 Added llvm src commit 140e15b2bddcc72a1a07b1dce8b84ae00f371e55 Author: Billy Moses Date: Tue Mar 17 22:11:09 2015 -0400 first commit Bug fixes from previous merge Updated clang submodule Code cleanup to reduce diff against mainline LLVM. Additional code cleanup. Fixes to address several failing LLVM regression tests. Based on the SyncElimination tests, specifically "for2.ll," it appears that SyncElimination removes sync instructions that are not safe to remove. One relevant test has been updated to note this problem and marked "XFAIL." [CodeGen] Reverting an earlier change to SelectionDagISel for Cilk codes. Previously, to fix an “rbp/rsp issue” with Cilk codes, SelectionDagISel was changed to set a flag in functions that expose “returns twice”, in order to make those functions appear to contain variable sized objects. Setting this flag causes LLVM regression test “CodeGen/X86/setjmp-spills” to fail. Setting related flags, such as “HasOpaqueSPAdjustment” through their existing public interface also causes the same regression test to fail. In addition, I don’t see any rbp/rsp issues with Cilk codes when SelectionDagISel is does not set any such flag. For these reasons, I'm removing this previous change to SelectionDagISel. [Tapir] Adding test to verify that LoopSpawning properly handles parallel loops in Tapir whose body reads the loop limit. [PassManager] Reworking Tapir modification to PassManagerBuilder to ensure that Sanitizer instrumentation passes run only once. This change should also help improve Tapir's compatibility with LTO. [Tapir] Updating simple LoopSpawning test. [TSan] Reverting change to TSan instrumentation, which was causing a test to fail. We will need to introduce a new instrumentation pass specifically for CilkSan. Updated clang submodule [CilkSanitizer] Added custom instrumentation pass for CilkSan. [ThreadSanitizer] Removing old change to ThreadSanitizer for use in CilkSan. [CilkSanitizer] Added instrumentation of memory intrinsics and atomics. Added simple optimization to elide instrumentation of non-captured pointers in serial functions. [Tapir] Removed requirement to unify returns in all functions for Tapir lowering to Cilk ABI. Updated clang submodule. [CilkSanitizer] Improved analysis for avoiding instrumentation. [CilkSanitizer] Allow CilkSanitizer to handle a larger variety of memory access sizes and to properly ignore memory accesses of illegal sizes. Squashed commit of the following: commit 9eef73e8b7b5dab5d8e04a0fa584fd765e5b1d13 Author: TB Schardl Date: Fri Aug 4 01:43:13 2017 +0000 [TRE] Fix bug with Tapir modification of TRE that was causing unit tests to fail. commit 92b16128f980b6683cb53a324480d7305f4327d4 Author: TB Schardl Date: Thu Aug 3 13:10:01 2017 +0000 [README] Attempting to clean up README file. commit fa242e0f01133707c3a483cfabedf3ee28abba7a Merge: a8e2b795fb3 f55a27066ac Author: TB Schardl Date: Thu Aug 3 12:52:13 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit a8e2b795fb34c87cd2c884235c3b50be0c17c3e7 Author: TB Schardl Date: Thu Aug 3 12:49:10 2017 +0000 [README] Updated README. commit f55a27066ac03e39e6a01ca30e86bc48df76fa7e Author: William S. Moses Date: Tue Aug 1 20:17:47 2017 +0200 Add CircleCI commit 964b5bea84c59cdc7e27bc07e98f12edc821c4fc Author: TB Schardl Date: Wed Aug 2 21:35:11 2017 +0000 [LoopSpawning] Correctly handle Tapir loops where the loop body uses the variable storing the number of loop iterations. Fixes #13 commit 8d4f443d9c9b78478279d598c4eb9abd79db1e59 Merge: 452aac7e148 ef122d645a8 Author: TB Schardl Date: Wed Aug 2 21:35:22 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 452aac7e14852491121f7ca26f24f420414a5245 Author: TB Schardl Date: Wed Aug 2 21:35:11 2017 +0000 [LoopSpawning] Correctly handle Tapir loops where the loop body uses the variable storing the number of loop iterations. Fixes #13 commit ef122d645a83c9ad9ee743329208ee001071a4f2 Author: William S. Moses Date: Tue Aug 1 20:17:47 2017 +0200 Add CircleCI commit 9be75a22ad015c307665d277994651671a15ae60 Author: TB Schardl Date: Mon Jul 10 15:57:49 2017 +0000 [CSI] Bug fixes and refactoring of the CSI instrumentation pass. commit 6ce5f2f27b1bc2d92e48420376c2a37d1608f3a1 Author: TB Schardl Date: Mon Jul 10 13:37:39 2017 +0000 [Tapir] Allow Tapir lowering to Cilk to fill in missing definitions of internal Cilk types, including __cilkrts_worker and __cilkrts_pedigree. commit 631e4626d2ba614eaf8a68113c2fdf02f9f8e246 Author: TB Schardl Date: Fri Jun 30 21:33:54 2017 +0000 [DetachSSA] Initial implementation of an analysis pass that tracks the creation and synchronization of detached tasks. This analysis is based on MemorySSA. commit 923a9052c95c43df1405fad56f2cb1ef12a47412 Author: TB Schardl Date: Tue Jun 27 21:54:51 2017 +0000 [Tapir] Adding support for sync regions. A sync region is designated by a token emitted by a call to @llvm.syncregion.start. The detach, reattach, and sync instructions all take this token as a parameter. A sync instruction in a sync region SR only waits on computations detached from detach instructions in the same sync region or in a detached descendant thereof. By convention, a call to @llvm.syncregion.start occurs in an entry block, that is, either the entry block of a function or the entry block of a detached sub-CFG. For Cilk programs, a sync region is started for any function that performs a _Cilk_spawn or _Cilk_sync. A separate sync region is also started for each _Cilk_for in the function. Sync regions address two issues with sync instructions. First, with sync regions, the implicit sync at the end of a _Cilk_for only waits on the parallel iterations of that _Cilk_for, not on any other spawned computation within the function. Second, when a function is inlined, any _Cilk_sync performed by that function will not erroneously wait on detached computations in its caller. This commit includes simple cleanup passes involving sync regions. One form of cleanup removes sync instructions in sync regions that contain no detach instructions. Another form removes empty sync regions, i.e., calls to @llvm.syncregion.start whose produced token is never used. Future work will analyze sync regions more carefully and combine them when it is deemed safe. commit 9b55aac80aca2a520ba7627a020af413be18a29f Merge: 9b5abba8e85 eece7bcb178 Author: TB Schardl Date: Sat Jun 3 12:42:01 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 9b5abba8e85b01c08d49885fdc6d871ed0e522e9 Merge: 51a4df5f3e5 6ef5e10ad7e Author: TB Schardl Date: Wed May 31 02:07:52 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 51a4df5f3e536a65c0a926ee7c87eb47c80aec7f Merge: 6f69cdf478c 0559b4fa45c Author: TB Schardl Date: Tue May 30 18:19:52 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit 6f69cdf478cc2801c74964e3a233ad46d16245cc Author: William S. Moses Date: Mon May 15 01:15:30 2017 -0400 remove Rhino print commit d719d172fd8967cccb6625ff1ec54e439cdfe989 Merge: d2b4d301879 2db0ffd4753 Author: William S. Moses Date: Mon May 15 01:04:30 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit d2b4d301879c0a75cbbd9d7c49e51581543ff08b Author: William S. Moses Date: Mon May 15 01:04:14 2017 -0400 pushing rhino flag commit 2db0ffd47534ee35deaea877d73d8484cb94c01f Author: Douglas Kogut Date: Mon May 15 00:24:54 2017 -0400 spawn unswitch commit 8f57e0739bf9fc6736472c89f91a533630efd5c3 Merge: 9660ce4abc0 be7eafc7179 Author: William S. Moses Date: Sun May 14 17:36:17 2017 -0400 Merge branch 'master' of github.com:wsmoses/Parallel-IR into 6898 commit 9660ce4abc060598a20b7c5d30a217bdc3af569e Merge: 002fb57bb06 780934e4b6a Author: William S. Moses Date: Sun May 14 17:35:58 2017 -0400 Merge branch 'master' into 6898 commit 002fb57bb069f18319ceab0d287c22166999a766 Merge: 35669cce54f acefa6d5a77 Author: William S. Moses Date: Sun May 14 15:32:41 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit acefa6d5a77cad0cb2da8f5c6cfe3af1ca15129e Author: Douglas Kogut Date: Sun May 14 14:58:08 2017 -0400 spawn unswitch commit be7eafc7179b8591b0007a25a2e3aae31cfc7818 Author: TB Schardl Date: Tue May 9 21:34:49 2017 +0000 [Mem2Reg] Updated Mem2Reg to find the entry blocks of the function and all detached sub-CFG's more efficiently. commit 12f929ae136d57fd9e744bc2dac8c072d01e2053 Author: TB Schardl Date: Tue May 9 21:15:58 2017 +0000 [CilkABI] Marked additional loads and stores to CilkRTS stack frames as volatile. Fixed bug in extracting exception-handling exit blocks for detached CFG's. commit 9bf9a4d58c9f3a09164b8a86202bcee2f5abf553 Author: TB Schardl Date: Tue May 9 21:14:33 2017 +0000 [InstCombine] Fixed bug to prevent InstructionCombining pass from sinking operations that read memory across Tapir instructions. commit 719872be7ce9d8cdbc7036c6eb7d3d77ebeff5cf Merge: f63b0fed940 10826f2652f Author: Douglas Kogut Date: Fri Apr 28 20:39:49 2017 -0400 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit f63b0fed9406ac9f5f8b54626a9c6ef965cceaba Author: Douglas Kogut Date: Fri Apr 28 20:39:34 2017 -0400 pushing measuring scripts commit 991ca791848c9936677a0b7184a77cf0eaf6734d Author: TB Schardl Date: Wed Apr 26 12:17:07 2017 +0000 [LoopSpawning] Cleaning up code for handling exceptional exits. commit 10826f2652fea87d11ec166954c2d7b02917c21d Author: Jiahao Li Date: Tue Apr 25 23:24:56 2017 -0400 Alters sync elimination pfor microbenchmark. commit 9d5172300fcd2528dc4db210beccfa6cecb7816f Author: Jiahao Li Date: Tue Apr 25 23:07:07 2017 -0400 Makes LoopFusePass work. commit 46720980313325bf80262b8fd447db8e90f1c307 Author: TB Schardl Date: Wed Apr 26 00:10:42 2017 +0000 [LoopSpawning] Bug fix to find all exception-handling exit blocks of a Tapir loop. commit 48e7791f51c0a3b0fc27cc280e458892dac30fbd Author: TB Schardl Date: Tue Apr 25 01:30:48 2017 +0000 [Tapir] Preliminary support for C++ exceptions on Linux. commit 4613a6461de60516a6242270e4c6cd7beb1c5bec Author: TB Schardl Date: Tue Apr 25 01:28:09 2017 +0000 [CSI] Updated CSI pass to support separate property types per IR object. commit d5331895cb2d1437b7788469ac72c731b65a949b Author: Jiahao Li Date: Sat Apr 22 15:21:03 2017 -0400 Have makefile for sync_elimination_pfor_mb emit .ll for the sync eliminated version. commit 3b2b3c3429af3f1a173970cef45844639d35361b Author: Jiahao Li Date: Sat Apr 22 15:09:04 2017 -0400 Cleans up makefile for sync_elimination_pfor_mb. commit 21aa2bbee01f1dbc86681a7ed78b7cfd8fd611d5 Author: Bojan Serafimov Date: Sat Apr 22 14:57:32 2017 -0400 Fix compile error commit 0c5e6d15f12288dc29e9f08ff9d011c1204f69ba Author: Jiahao Li Date: Sat Apr 22 14:45:38 2017 -0400 Fixes sync_elimination_pfor_mb micro benchmark. commit a387e9f3e16ab5253eec663bbb56c246e4dbda55 Author: Jiahao Li Date: Sat Apr 22 14:26:06 2017 -0400 Fixes SyncElimination blow up with function calls. commit 44e8409f071578546b572b6dd807a83092867bfa Author: Bojan Serafimov Date: Mon Apr 10 12:06:51 2017 -0400 Fix tests commit adeb3eaaf5af3d9c816db1a704324c9f715a0277 Author: Jiahao Li Date: Mon Apr 10 11:46:36 2017 -0400 Handles instructions with null call sites. commit 96f24b65e5a4634c8a78ac0e53dd552fe46d185d Author: Bojan Serafimov Date: Mon Apr 10 10:19:42 2017 -0400 Ignore sync instruction in rosetta commit d874567d6e6cdfc88c0faab3122975046162ec09 Author: Bojan Serafimov Date: Tue Apr 4 19:14:29 2017 -0400 Add nested loop test commit 8f7734960776d31ddcb0cf690da837c3f7ee9229 Author: Bojan Serafimov Date: Fri Mar 17 17:39:58 2017 -0400 Fix bug in FindRosetta commit e0bac90f990423a17e245cd6cb2d9f9f2b387951 Author: Bojan Serafimov Date: Fri Mar 17 17:03:16 2017 -0400 Add test cases commit 7ccc4c9454b80ef03f14a0c03d86fceea2309581 Author: Jiahao Li Date: Fri Mar 17 16:57:54 2017 -0400 Fixes sync elimination test. commit b5f16cfaf2ce8c9311104f356522c527cfe0b8ba Author: Jiahao Li Date: Fri Mar 17 16:51:37 2017 -0400 Removes incomplete sync elimination test. commit 344d075d08c6d23be99373b1b65a94fb6f92701d Author: Jiahao Li Date: Fri Mar 17 16:47:29 2017 -0400 Removes function renaming in sync elimination. commit 4045b1f2bd1d4e1ff6527bdc4349d9938e188463 Author: Jiahao Li Date: Fri Mar 17 16:15:20 2017 -0400 Fixes loop condition error in sync elimination. commit 7eab317e1436d2fc456f0f625ef4888577c53bec Author: Bojan Serafimov Date: Fri Mar 17 16:33:40 2017 -0400 Fix tests commit 2c6412e1a4bb92a5fc86f63803a52ea22c43aa05 Author: Jiahao Li Date: Fri Mar 17 14:54:13 2017 -0400 Implements legality check for sync elimination. commit a57ac4cafdfe845f0c90cc0611705c38f87f1905 Author: Bojan Serafimov Date: Fri Mar 17 16:05:14 2017 -0400 Add basic SyncElimination tests commit a7c6bdec1a3562a9333e06497e362ab5e8e45613 Author: Bojan Serafimov Date: Mon Mar 13 11:09:06 2017 -0400 Implement sync removing commit 271c65cf91c5a2223ebac864cb55d6137d6d00c4 Author: Jiahao Li Date: Thu Mar 9 16:59:16 2017 -0500 Implements Vegas-set finding for SyncElimination pass. commit 72827d0cc4ef8b3fb556bdb4660c6b0891849b4f Author: Jiahao Li Date: Thu Mar 9 15:58:45 2017 -0500 Implements Rosetta-finding part of SyncElimination pass. commit df4c672499f76bcbfdf93806755e6f9ff15035f6 Author: Jiahao Li Date: Thu Mar 9 15:08:28 2017 -0500 Cosmetic cleanup. commit 2682b3bf34c4efd7fc86e0af26d3a0b1dffc108f Author: Bojan Serafimov Date: Wed Mar 8 00:52:22 2017 -0500 Add SyncElimination pass commit 3856a31e3af623255498bc878b750e82c90a34b7 Author: Jiahao Li Date: Sat Apr 22 16:27:38 2017 -0400 Enables LoopFuse by default. commit 6017d8b2a125a66cb418d247281433a5665ab249 Author: Jiahao Li Date: Sat Apr 22 16:27:26 2017 -0400 Rebases LoopFuse to compile on the current code base. commit 367d9d916cbaf9d2433d267bf9c70be772fe8af7 Author: Jiahao Li Date: Sat Apr 22 16:04:20 2017 -0400 Replaces LoopAccessAnalysis with LoopAccessLegacyAnalysis in LoopFuse. commit bb0b29851651bc1d122b7aed839a58edb4e656ce Author: Jiahao Li Date: Sat Apr 22 15:40:47 2017 -0400 Applies https://reviews.llvm.org/D17386 for Loop Fusion Pass. commit 3ce522e822ad2a0b047c0cc905cf59b8f4247d26 Author: Douglas Kogut Date: Sat Apr 22 14:11:36 2017 -0400 pushing spawn work commit 0dd0df9b42bac64d82ffe5035f6d4f5d7b2dd2b0 Author: TB Schardl Date: Thu Mar 30 12:40:37 2017 +0000 [PassManager] Re-enabling passes that happen after optimizations when Cilk is not enabled. commit 511ba02c8ccb2bf15a0791007229389352bffef9 Author: TB Schardl Date: Thu Mar 16 14:25:49 2017 +0000 [Tapir] When outlining, propagate available alignment information to the parameters of the outined function. commit 4722cecdb2cef0b0ab84c08f65ae296bb4c01a2f Merge: 285ff461789 780934e4b6a Author: TB Schardl Date: Fri Mar 10 20:18:23 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 285ff4617892da4132f4a0aded992dcc4c5af6d5 Author: TB Schardl Date: Fri Mar 10 20:17:05 2017 +0000 [Tapir] Fix to properly maintain allocas in the entry block of a detached context. These changes ensure that every detached context has an entry block with just one predecessor. These changes also move allocas among entry blocks during function inlining and the outlining process for lowering Tapir. These changes also remove syncs associated with parallel loops after outlining. commit 489f0a4673d2b0364556382569e421fed347d301 Author: TB Schardl Date: Fri Mar 10 20:14:03 2017 +0000 [Local] Bug fix to make the GetDetachedCtx routine to properly return the detached BB at the start of a detached context. commit cd7e9f3c2d840182ab82830218703b78c657d1b0 Author: TB Schardl Date: Fri Mar 10 20:11:56 2017 +0000 [SimplifyCFGPass] Code cleanup and comments. commit 35669cce54f33447d1f12423e71536ab31cf02e5 Merge: 1fae2a923fb 52889bc3118 Author: William S. Moses Date: Wed Mar 8 11:33:46 2017 -0500 Merge branch '6898' of github.com:wsmoses/Parallel-IR into 6898 commit 780934e4b6a8054900b774d9405c0dd426bd23be Author: William S. Moses Date: Tue Mar 7 18:08:44 2017 -0500 Parallelize / Shorten compilation commit 4cc8071621e2c159a755a594bdb5dde9fbdfe74d Author: William S. Moses Date: Tue Mar 7 17:37:28 2017 -0500 Fix optimized llvm build commit 26007676a05e6c0445a0971f5bbfb0a2b2e9c47b Author: William S. Moses Date: Tue Mar 7 17:31:40 2017 -0500 Updated binary commit 6917c16e028fb03a608ba2e2f33ce48c68900b92 Author: William S. Moses Date: Tue Mar 7 17:21:27 2017 -0500 Faster cmake and autobuild matrix commit 088941d05808f63865028347f4fcd3cbc849ce08 Author: William S. Moses Date: Tue Mar 7 16:56:44 2017 -0500 Remove old cmake commit c558e05a3917b7be37490cd45b6c2d9fc153adbc Author: William S. Moses Date: Tue Mar 7 16:55:17 2017 -0500 Print directories for debugging script commit 074121e15927e674b16e2656913ecd08d557a422 Author: William S. Moses Date: Tue Mar 7 16:45:52 2017 -0500 Leave directory in autobuild after cmake commit 30a221e0a04ae4dae0575a092800799e7aa7792f Author: William S. Moses Date: Tue Mar 7 16:38:07 2017 -0500 Build without parallel option commit 7a7d719c26e78e049093f1869eb6573e7cb3e529 Author: William S. Moses Date: Tue Mar 7 16:32:07 2017 -0500 Build newer cmake from source commit 24f129bf4857357c90f8458c2ce09b60ab112b36 Author: William S. Moses Date: Tue Mar 7 16:24:00 2017 -0500 Correct ppa commit e2bc0fc2d7edc08fb427b6f0a30862c602e57dfb Author: William S. Moses Date: Tue Mar 7 16:21:28 2017 -0500 Change CMake to sourceline commit c6249f0bce0d9906f5d669c6d44d15f5977e09d3 Author: William S. Moses Date: Tue Mar 7 16:16:37 2017 -0500 Attempt newer CMake commit fe47a0078d432ee911504fa05c1af0652122dce7 Author: William S. Moses Date: Tue Mar 7 16:08:27 2017 -0500 Build PClang along with Tapir commit 8ee564cae3bbb672546427bab5137b90ce2fdc17 Author: William S. Moses Date: Tue Mar 7 16:07:36 2017 -0500 Build intel runtime using the Tapir compiler commit 6750684c7007e0e6ea0300498e7196cf68c52176 Author: William S. Moses Date: Tue Mar 7 16:00:50 2017 -0500 Add configure to cilk runtime building commit 3f3b46840218f1629f1183b1ef0772414ca145c2 Author: William S. Moses Date: Tue Mar 7 15:57:18 2017 -0500 Add make to dependency list commit bd6f8df75f130bcf260fc4a3102d73341d21dc1b Author: William S. Moses Date: Tue Mar 7 15:54:50 2017 -0500 Add cilk runtime building commit 6372499258146bf9da15f0153c9e4f4d288578cc Author: William S. Moses Date: Tue Mar 7 15:42:22 2017 -0500 Change autobuild cmake version commit 9fec173620bf1c3c964292485f007a69fc05ca72 Author: William S. Moses Date: Tue Mar 7 15:39:43 2017 -0500 Change autobuild distribution commit 1fae2a923fb632a6eb1dabc4826e3b2533735273 Author: William S. Moses Date: Tue Mar 7 15:35:20 2017 -0500 Relist as package commit 52889bc31182f3faebcfce24918670967b5b96f6 Author: Douglas Kogut Date: Mon Mar 6 12:11:10 2017 -0500 pushing example opt pass commit fe692e250aa8a78435200882ebb89c17f881c4d3 Author: TB Schardl Date: Fri Mar 3 13:25:57 2017 +0000 Ignoring debug build directory. commit 69fa592b7e889be513f1004b1f13dd450a1be378 Merge: 3c56ed06c17 df445de9e82 Author: TB Schardl Date: Fri Mar 3 13:20:52 2017 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 3c56ed06c17f764e2c1221df60e8ee45199b1577 Merge: 4611d796dea 2d562fe758b Author: TB Schardl Date: Fri Mar 3 13:19:05 2017 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm commit df445de9e8252e5aff8a6d7645128df71b3bd45f Author: William S. Moses Date: Thu Mar 2 00:37:50 2017 -0500 Correct CI build script commit efa60d2d710c5697f6be5737898897cfb56b4509 Author: William S. Moses Date: Wed Mar 1 16:07:01 2017 -0500 Force travis-ci to rebuild commit 66ed989e47c276699462c761b0e4f2b68ef5d951 Author: William S. Moses Date: Tue Feb 28 16:18:35 2017 -0500 Initial attempt at adding Travis autobuilder commit b8a1f3fb7874d52fedb6db8a786695521a846709 Merge: 518873a5b44 a3bd7557fb6 Author: William Moses Date: Tue Feb 28 11:49:18 2017 -0500 Merge pull request #12 from YingVictor/master [LowerToCilk] Fix memory leak. commit a3bd7557fb661ef0980599d430e7cd0a52f7f385 Author: Victor A. Ying Date: Tue Feb 28 11:41:08 2017 -0500 [LowerToCilk] Fix memory leak. SmallVector of NewHelpers needs to be deleted. commit 518873a5b44c8ffc37282cb3887a1518525eca7f Merge: 645daf3405c fb71c4aa6b4 Author: William Moses Date: Sun Feb 26 17:29:34 2017 -0500 Merge pull request #11 from YingVictor/master Two minor fixes commit fb71c4aa6b408ce59e095b3d770ba01ab4eb9f51 Author: Victor A. Ying Date: Sun Feb 26 16:53:55 2017 -0500 [include/llvm-c/Transforms/Tapir.h] Fix function name mentioned in comment. commit 2e658275b9935e536f86aec6b7f911b6c5e374cc Author: Victor A. Ying Date: Sun Feb 26 16:46:18 2017 -0500 Properly remove traces of clang submodule. Removing a git submodule requires more than just deleting the the entry in the .gitmodules file, as was done in the previous commit. It also requires deleting the special directory entry from the git index, which should be done using some variation of "git rm", such as: git rm --cached path/to/submodule Which is what I did in this commit. commit 645daf3405c01f6e262373a6c849466f09162f44 Author: William S. Moses Date: Fri Feb 24 15:35:50 2017 -0500 Remove clang submodule commit c9830e69c572885f6bfc7a74179a8e7efb6c851e Merge: 3ad6c9cb76e 4611d796dea Author: William S. Moses Date: Fri Feb 24 15:33:45 2017 -0500 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 3ad6c9cb76eba2c5fbf7a5c8416ac28793d6455e Author: William S. Moses Date: Fri Feb 24 14:10:50 2017 -0500 Update clang to stable commit 4611d796dea964dea884c34cadcef14b256fbe56 Author: TB Schardl Date: Tue Feb 21 19:46:22 2017 +0000 [CodeExtractor] Removed unused function from CodeExtractor. commit 73b2a05f9106a888ae92fbd9d89fd36be310bcce Author: TB Schardl Date: Sun Jan 15 14:19:32 2017 +0000 [LoopSpawning] Restored warnings when LoopSpawning fails to transform a marked loop. commit 710c06b2ffad2727ff751113b90b9905f4a3c845 Author: TB Schardl Date: Sun Jan 15 14:18:54 2017 +0000 [CodeExtractor] Removing old code for dealing with debug symbols. commit ab75cf00f520c07d4dafa58328fa809780ac146b Author: TB Schardl Date: Fri Jan 13 22:25:29 2017 +0000 [LowerToCilk] Renaming Detach2Cilk to LowerToCilk, as part of some code cleanup. commit 2748779e158be086e9fa52300ccd5fcded978044 Author: TB Schardl Date: Wed Jan 11 13:59:02 2017 +0000 Updated associated version of Clang. commit 738a76c83c83017faaeeaf959fb0c45b4586b08f Author: TB Schardl Date: Wed Jan 11 13:31:23 2017 +0000 [test] Adding some simple regression tests for Tapir. commit 5b63394d73f1d65ec6e338ed9ba8063895d8ef4e Author: TB Schardl Date: Mon Jan 9 19:11:44 2017 +0000 [Tapir/Outline] Fix debug build. commit df3dcb657228c40bff3ee7cab30944ed9e116021 Author: TB Schardl Date: Mon Jan 9 02:31:01 2017 +0000 [Tapir/Outline] Minor code cleanup. commit facf7c87283b30b139fe75fbd4caacfc32c0fb37 Author: TB Schardl Date: Mon Jan 9 02:29:07 2017 +0000 [Detach2Cilk] Inline __cilk functions into generated helper functions. commit c32adbf10f18c9a52e10de2e046329f67f635699 Author: TB Schardl Date: Sun Jan 8 22:48:22 2017 +0000 [LoopSpawning] Code cleanup for release build. commit 3b460341f6a21344ddbc11100cd75ef079bcd8ee Author: TB Schardl Date: Sun Jan 8 22:41:02 2017 +0000 [Detach2Cilk] Fixed creation of Cilk stack frames for release build. commit 4bcdb952154d0daf4f18384cceda7f72e7b2542d Author: TB Schardl Date: Sun Jan 8 20:42:48 2017 +0000 [SROA] Minor code cleanup. commit 3c73fb9bf4d241c96c31f10c3a89074ffbf30774 Merge: 0d6f0aad70a 18687546b92 Author: TB Schardl Date: Tue Jan 3 19:24:51 2017 +0000 Merge branch 'new_lowering' commit 18687546b9276fcb76c619193ee46b93f05a7001 Author: TB Schardl Date: Tue Jan 3 17:18:12 2017 +0000 [Detach2Cilk] Code cleanup. commit 2a7c78c09452762cc784ac4cf92381340830a90c Author: TB Schardl Date: Tue Jan 3 16:59:48 2017 +0000 [LoopSpawning] Added support for Tapir loops with exit blocks terminated by unreachable. commit a1af329428f71f12decbe8776e2d9b4d9b377c63 Author: TB Schardl Date: Sat Dec 31 17:06:01 2016 +0000 [CSI] Fix formatting of CSI pass. commit 08b3602ddb14e7bbe7fe78faa7a12c4fbd43e431 Author: TB Schardl Date: Sat Dec 31 17:05:07 2016 +0000 [CSI] Add function names to FED tables. commit 1672db6417856784850c9aaa5f879c1bb5f6f539 Merge: a22c19d21b9 56516028d8b Author: TB Schardl Date: Sat Dec 31 14:59:27 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit a22c19d21b991cd92e7f64103166f66f0f89eabd Merge: 04b71642665 7f580b605b2 Author: TB Schardl Date: Tue Dec 20 14:25:09 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 04b716426657e5cf52c69e6e6953492e1e3b7434 Author: TB Schardl Date: Tue Dec 20 14:09:15 2016 +0000 [LoopSpawning] Switching LoopSpawning back to implementing divide-and-conquer scheduling directly. commit c03b7f076ab44c6e37edb033cf1b16950740fca7 Merge: 0cc6919dafd eaf3712d06e Author: TB Schardl Date: Mon Dec 19 21:47:05 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 0cc6919dafdf326efdfa275f66556ad1a9abfe67 Author: TB Schardl Date: Mon Dec 19 20:34:25 2016 +0000 [Outline] Cleaning up the code. commit 747d1e8211d2c6ce8eeee40a79d3f684e9747e1c Author: TB Schardl Date: Mon Dec 19 20:30:37 2016 +0000 [LICENSE] Updated license to add copyright for changes to implement Tapir. commit 0d6f0aad70ae0b75a4f71567bd098703070c3c56 Author: William S. Moses Date: Sat Dec 17 23:15:13 2016 -0500 add clang submodule commit 463af403bf33e14b759a60377c95ffe3d1f74382 Author: TB Schardl Date: Tue Dec 13 02:28:54 2016 +0000 [LoopSpawning] Keeping two versions of divide-and-conquer loop spawning around. commit fcae33a06441a48081c463f74d12fc5f6b9ce68a Author: TB Schardl Date: Tue Dec 13 02:21:17 2016 +0000 [PassManagerBuilder] Modification to support more faithful reference pipeline for PPoPP. commit 6a8c5d26ad24a6f35ca8afcc17f18ea89f790f09 Author: TB Schardl Date: Sun Dec 11 22:29:25 2016 +0000 [LoopSpawning] Fixed bug in computing loop count for using Cilk ABI call. commit b8af887cac2f664ae780631cd14ea2a194ea042c Author: Ubuntu Date: Sun Dec 11 08:19:56 2016 +0000 cilk abi loopspawning commit 217f4eafa2694468cb3817fb65e05b95ddd1d0b3 Author: TB Schardl Date: Sat Dec 10 20:39:12 2016 +0000 [CilkABI] Bug fix to allow proper lowering of when a loop is the entry of a detached CFG. commit 82cb28db1a9877d923da8a038c8f33a9079b6121 Merge: 8a4ac0d5d6e 05bdd2ebfe8 Author: TB Schardl Date: Mon Nov 28 21:20:47 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 8a4ac0d5d6ee455a6000fd60cd37018642a2b5ba Author: TB Schardl Date: Mon Nov 28 15:58:29 2016 +0000 [LoopSpawning] Refactored to be a FunctionPass, instead of a LoopPass. More work is needed for this pass to legally add functions to the current Module. commit 7f96f2c38f8233502a50c6bfd66257be0915ea41 Author: TB Schardl Date: Mon Nov 28 15:55:11 2016 +0000 [LoopSimplify] Modified to ensure that the preheader of a loop is not terminated by a sync. commit f84012859a7fd293377b87a2c0d95d2cbd75aee0 Author: TB Schardl Date: Mon Nov 28 15:53:05 2016 +0000 [Tapir/Outline] Cleaning up commented-out code. commit 2e932359c6f63a76e6a040bdf577ca9f162ddd8f Author: TB Schardl Date: Mon Nov 28 15:52:22 2016 +0000 [BasicBlockUtils] Modified SplitEdge to keep sync instruction in original block. commit 32aeb36a6f76b69247231a1b57a9b66a32627ed1 Author: TB Schardl Date: Mon Nov 28 15:50:19 2016 +0000 [Detach2Cilk] Making Detach2Cilk a ModulePass, instead of a FunctionPass, so it can safely add functions to the module. commit 6ab23d5f49ab42f2d3074523570cf72cd7ee6d02 Merge: 56598980fc5 52894d83e1a Author: TB Schardl Date: Sat Nov 26 17:23:45 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit e189e6c97da75849d75b512dd5513c0ec5a09af4 Merge: 6952888faaa c3bdfe57eb1 Author: Ubuntu Date: Thu Nov 24 17:07:50 2016 +0000 Bring up to date with most recent llvm commit 56598980fc58d0bd68e2957eb45371eb23245995 Merge: 6a33185a05c 3e65807a6f1 Author: TB Schardl Date: Wed Nov 23 18:31:46 2016 +0000 Merge branch 'master' of github.com:llvm-mirror/llvm into new_lowering commit 6952888faaaf797beb00934eee0c99f85fbfeea5 Merge: e79c0d93864 e372554cd73 Author: TB Schardl Date: Fri Nov 11 21:42:16 2016 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit e79c0d93864a579bf6b865802e182a7b80d9ea48 Author: TB Schardl Date: Fri Nov 11 21:34:37 2016 +0000 [PassManager] Ensure that extensions to the pass manager that are intended to run last only run once on Tapir programs. commit 6a33185a05c72739458a92e13a103ed4b3ae4b97 Author: TB Schardl Date: Fri Nov 11 21:34:37 2016 +0000 [PassManager] Ensure that extensions to the pass manager that are intended to run last only run once on Tapir programs. commit 6f2c14afe41e2bb9729976b52734d98f3c99bae3 Author: TB Schardl Date: Fri Nov 11 21:18:30 2016 +0000 [LoopSpawning] Ensure that calculation of a Tapir loop limit is inserted at the end of the loop's preheader. commit e372554cd7396b1facc00f6d5df7d51f89553e31 Author: William S. Moses Date: Thu Nov 3 23:57:38 2016 -0400 Remove some debug prints commit 6baad834b9903206be5830e9a5d81cb8c118dc80 Author: William S. Moses Date: Thu Nov 3 23:54:44 2016 -0400 Remove some debug prints commit 782593d7bcd41736b148b6b128890d31f0d49f10 Author: TB Schardl Date: Tue Nov 1 14:40:47 2016 +0000 [LoopSpawning] Cleaning up code and debug output. commit f604273ecf927017dc48afdae928477f8708e0d5 Author: TB Schardl Date: Tue Nov 1 14:39:42 2016 +0000 [Detach2Cilk] Should not need to inline detached helper functions anymore, because Detach2Cilk should properly handle debug symbols. commit 20d299f2d2839b1f45b6716970f5a99ee821cec3 Author: TB Schardl Date: Tue Nov 1 14:37:40 2016 +0000 [PassManagerBuilder] Run SimplifyCFG after Detach2Cilk to clean up cruft left by Detach2Cilk. commit 1610d83dd9f26a9f47004634f83b7e5a614f46f6 Author: TB Schardl Date: Tue Nov 1 14:36:49 2016 +0000 [Detach2Cilk] Fix to ensure that Phi nodes in the continuation of a detach are still valid after lowering the detach to Cilk runtime calls. commit ea14d8bd01adccba902cdae883625698319b7d61 Author: TB Schardl Date: Tue Nov 1 04:42:24 2016 +0000 [CilkABI] Converting Detach2Cilk pass to use new Tapir outlining methods, in order to handle debug symbols more correctly. commit 1f30c735f929c5821cf575aeea59ee1b6eef3164 Author: TB Schardl Date: Mon Oct 31 21:56:25 2016 +0000 [LoopSpawning] Fixed bugs to properly erase loops after performing transformation and to handle preheaders terminated by syncs. commit a86651dd973a6f0743b4a360396dba6360fc5bdf Author: TB Schardl Date: Mon Oct 31 21:54:45 2016 +0000 [Outline] Cleaning up CreateHelper Tapir outlining method. commit 31691cd15ae0f76c40420339849f652888294863 Author: TB Schardl Date: Mon Oct 31 15:38:08 2016 +0000 [LoopSpawning] Cleaning up LoopSpawning code, and adding output to loop-spawning reports. commit 51220e44f007bb6b5be02ecbbf2e20840634daba Author: TB Schardl Date: Mon Oct 31 15:34:55 2016 +0000 [Tapir] Renaming TapirOutline to Outline. commit 6950ba60b07973d535c06f288e0ed30b14d43aa9 Author: TB Schardl Date: Sun Oct 30 19:19:15 2016 +0000 [TargetLoweringBase] Dealing with compile warning on TargeetLoweringBase. commit 581677b179aa2ed89134c8034ac491fae68595f0 Author: TB Schardl Date: Sun Oct 30 19:18:10 2016 +0000 [LoopSpawning] Replacing Loop2Cilk with LoopSpawning. commit 39d404b1998c4c2d3635939c27f85c70e987d70f Author: TB Schardl Date: Sun Oct 30 18:54:23 2016 +0000 [DiagnosticInfo] New method for emitting warning messages for the LoopSpawning pass. commit 3d834b9e67f2779d2acd2bfd65d0b192561597d1 Author: TB Schardl Date: Thu Oct 27 21:27:33 2016 +0000 Updating passes to run around new Loop2Cilk implementation. commit 35ec023f57f3a240f598d2a9822ec29aedcaf48c Author: TB Schardl Date: Thu Oct 27 21:25:43 2016 +0000 Moving Tapir-specific transformations to a separate subdirectory under Transforms. commit 3aae9e2c7b3402a3816f5b31a70a9326674c7a9f Author: TB Schardl Date: Sat Oct 22 14:40:05 2016 +0000 [Cilk] Refactoring components for lowering Tapir to Cilk runtime calls. commit 0a92f963f5978e3f7cd91a1f77a9b3040b4a2baf Merge: 54f16a4669d fe05c97a9eb Author: TB Schardl Date: Sat Oct 22 14:33:05 2016 +0000 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 54f16a4669deaefc6a92a6f098485ee2d02d608b Author: TB Schardl Date: Sat Oct 22 14:30:27 2016 +0000 [Local] Cleaned up formatting to get rid of tabs. commit a8fade288fdbc1e194b7b0adba5ebdf61f05cb38 Author: TB Schardl Date: Sat Oct 22 14:28:18 2016 +0000 [Local] Fix to SerializeDetachedCFG to preserve debug symbols. commit 5cc10ed3110941799eb681ad00833028ca692193 Author: TB Schardl Date: Sat Oct 22 14:17:40 2016 +0000 [Instrumentation] Adding CSI instrumentation pass, copied from https://github.com/CSI-LLVM/. commit fe05c97a9eb98c01cfaa7a1a5129b0d002e2db70 Author: William S. Moses Date: Sat Oct 22 10:00:23 2016 -0400 Resolve issue 7 commit 4664388bb8c70312e21d321196942924a23955ff Author: TB Schardl Date: Wed Oct 19 16:01:28 2016 +0000 [emacs] Added detach, reattach, and sync as control instructions in LLVM's emacs mode. commit c0e8f4fe8db4bdac7f84bbf2ce6cb8a73a9252bd Author: TB Schardl Date: Mon Oct 17 04:14:35 2016 +0000 [SSAUpdater] Derive the correct value from detached predecessors. commit 2abd121b4c25579045347105a56b8383d0cefb9d Author: TB Schardl Date: Fri Oct 14 21:46:24 2016 +0000 [LICM] Fixing compiler crash when LICM attempts to move a store outside of a Tapir loop. commit 28606d0fb2e4e2bcaf37959292c2a89cedaf7a1e Author: TB Schardl Date: Thu Oct 13 02:12:43 2016 +0000 [AliasAnalysis] Minor formatting change. commit e5e04d08d7ddad2e021d0744ef52c52048955a2c Author: TB Schardl Date: Thu Oct 13 02:08:30 2016 +0000 [InlineFunction] Preventing InlineFunction from moving alloca's out of their detached context after inlining. commit 14719bb0513004960e3c8b0571b82981cc2b1239 Merge: 84848c51548 7f4bee18532 Author: William S. Moses Date: Thu Oct 6 13:53:55 2016 -0400 Merge branch 'master' of github.com:wsmoses/Parallel-IR commit 84848c51548b59b6beafa5c90615f36e64500199 Author: William S. Moses Date: Thu Oct 6 13:53:50 2016 -0400 Allow full unrolling of cilk for loops commit 7f4bee185325eebc78533ef450a45e43926da694 Author: TB Schardl Date: Thu Oct 6 16:51:37 2016 +0000 [AliasAnalysis] Force AliasAnalysis to fail fast if it finds a detached CFG that reaches its own Detach instruction. commit a2c6e22dd11c4212dbb64ce15020f677d77ed479 Author: TB Schardl Date: Tue Oct 4 22:44:38 2016 +0000 [Loop2Cilk] Fix splitting of loop preheaders that are terminated by sync instructions. commit 1d1bdcf375abd2e0e83a8500278acc6124bf16f2 Author: William S. Moses Date: Sun Oct 2 23:19:30 2016 -0400 minor modref fix commit 9ca914a946ee787fa8750a0a622d0f901641f2cf Author: William S. Moses Date: Fri Sep 23 16:12:32 2016 -0400 fix line info commit 16395e5ae2ab1cbc17de82c0127680aeccecedc1 Author: William S. Moses Date: Thu Sep 22 09:08:42 2016 -0400 Additional clean up commit af36e03c8282f4c431260dbfe16e3c323c72b82d Author: William S. Moses Date: Wed Sep 21 16:56:01 2016 -0400 clean up unrollinng commit 87d19e853f283cf9fac9c1e71239e34227fad27c Author: William S. Moses Date: Wed Sep 21 16:48:27 2016 -0400 resolve move to clang 4 commit 79323f66683946df1702005e3071f7fed23f0c3d Author: William S. Moses Date: Thu Sep 15 15:06:36 2016 -0400 fix tre commit 574835b96b09f8d9b496f17c303b7a3457cd2e1f Author: William S. Moses Date: Thu Sep 15 12:01:49 2016 -0400 Fix mem2reg bug commit 88cccc72240abd17a1dec0b2d238686919db7e81 Author: William S. Moses Date: Tue Sep 13 17:14:44 2016 -0400 fix running bugs commit f449ac224baed049d3a4eecaccaeef7ac0954e36 Author: William S. Moses Date: Mon Sep 12 14:10:31 2016 -0400 fmt commit 1d618f6fc664f473131fa11d3b5ba495e3d1cbbd Author: William S. Moses Date: Mon Sep 12 14:08:22 2016 -0400 fmt commit 05d2fe180fe4980474f8e7317936b312b749e048 Author: William S. Moses Date: Mon Sep 12 14:07:24 2016 -0400 fmt commit cb166968bc4f79b54e24272b59f935e3239109c6 Author: William S. Moses Date: Wed Aug 17 22:11:31 2016 -0400 solid commit 1be62909730984141b5afbec84c48823735c4429 Merge: c3eb1b7594a e65e275cf2f Author: William S. Moses Date: Wed Aug 17 18:01:27 2016 -0400 Merge remote-tracking branch 'llvm/master' commit c3eb1b7594a5953a324015aa08f745e31fb0ec65 Author: William S. Moses Date: Wed Aug 17 18:00:22 2016 -0400 cleanup commit 925a26d33e5aa664ed2a950bfac6f123832d28f1 Author: William S. Moses Date: Wed Aug 17 17:55:49 2016 -0400 cleanup commit 8a4aa28bc1ac48d2073507eb365e2461b206f524 Merge: 9ee354913cb 7177ff558c7 Author: William S. Moses Date: Wed Aug 17 02:54:17 2016 -0400 merge to mainline commit 9ee354913cb1d00c79b0173d87e8259db193d73f Author: William S. Moses Date: Mon Aug 15 01:43:52 2016 -0400 Add race detector commit 9b7715ebfc3bdd80382cbce7ca724868789c9cd6 Author: William S. Moses Date: Wed Aug 10 00:04:31 2016 -0400 cmake fixes commit b66e56629e6ddd6895342d281ed510b011cecff1 Author: Ubuntu Date: Fri Jul 29 21:11:20 2016 +0000 LICM fix commit c1aabfb01f044642dc9fb4317313d408c3cc39fc Author: William S. Moses Date: Wed Jul 27 21:22:20 2016 -0400 add merge functions commit 72b025f6f0d254ab7e37e7cabb42e9e27f01ede8 Author: William S. Moses Date: Wed Jul 20 13:40:34 2016 -0400 fix dt commit 39c33184af36efb1af71591940caf1924ace5ac8 Author: William S. Moses Date: Wed Jul 20 13:34:33 2016 -0400 fix dt commit af099d0ad6a6c263f969e2c8b577d8a6c80bd685 Author: William S. Moses Date: Wed Jul 20 13:14:30 2016 -0400 fix dt commit 920d83fc1bed8c82c0f2ccf58379371445206469 Author: William S. Moses Date: Wed Jul 20 12:12:44 2016 -0400 fix ph issue commit b0abbc37c6e836acf46b8703b54a0881fd499b96 Author: William S. Moses Date: Wed Jul 20 11:49:12 2016 -0400 resolve print commit d7aa05a4ebf5866d9fe70dd3733e9e20df4fdd76 Author: William S. Moses Date: Tue Jul 19 18:10:57 2016 -0400 major pbbs bugfix commit f470066edb8b7a8d8db7cef0b9a7b65f8fd8090a Author: William S. Moses Date: Tue Jul 19 14:31:06 2016 -0400 fix ppbs bug commit e1ac630d820ec2a7455392f4ddc9c4c620ea26c2 Author: William S. Moses Date: Mon Jul 18 21:35:07 2016 -0400 mod graint position commit 0e725b855f90f63703d71a8761f717697912b65c Author: William S. Moses Date: Mon Jul 18 21:14:16 2016 -0400 mod graint position commit 83e0982370d9a89d4f0b0b33636511568d8eda40 Author: William S. Moses Date: Mon Jul 18 16:17:40 2016 -0400 cilk abi fixes commit 63738d884d78c5297d1c781da81b6599e9cdeba3 Author: William S. Moses Date: Mon Jul 18 13:07:38 2016 -0400 fix recursive idx commit 45ca520784a38bbc13b0d00597310d931c757e4b Author: William S. Moses Date: Mon Jul 18 02:25:34 2016 -0400 fix issues with d2c extraction commit 0e9c93c9d38a035d1ea88c2fbfbff6d6144cde0f Author: William S. Moses Date: Sun Jul 17 22:21:06 2016 -0400 add reopt commit ec8c23de30635cb0969514bd18068d4e2bd77ec9 Author: William S. Moses Date: Sun Jul 17 22:18:39 2016 -0400 prevent rerunning passes commit 8d6bd63be4a6c8ebf61be02b9d2d8535de3b9484 Author: William S. Moses Date: Thu Jul 14 13:19:44 2016 -0700 fix asm errors commit f83bdc1fab9bf732ea0be8b134cea617e4f85500 Author: William S. Moses Date: Tue Jul 12 08:18:01 2016 -0700 fix unreachable merge domtree bug commit 662b5a7e0018b659b08dc9256dfd61f94d756f56 Author: William S. Moses Date: Mon Jul 11 16:04:43 2016 -0400 Resolve issues with bounds detection in loop2cilk commit 4866c5da1c28d2c67dc168edf119cc4adfbc07f3 Author: William S. Moses Date: Thu Jul 7 09:28:14 2016 -0400 minor attr fix commit 1f4c43c41f109f82859a88525a851f00b2e1b5e4 Author: William S. Moses Date: Thu Jun 30 15:05:11 2016 -0400 fix bounds error commit 0caf3f63eb873abb93e06080eb875f0945c5c2df Author: William S. Moses Date: Thu Jun 30 14:13:54 2016 -0400 speedup fix commit 5cf555f901601c76bc416f7ef94dc77b375bcf84 Author: William S. Moses Date: Thu Jun 30 12:41:46 2016 -0400 resolve linker issues commit 25e91bfc5f42f6eb1977cefe90336e85994d65d3 Author: William S. Moses Date: Thu Jun 30 12:37:47 2016 -0400 prevent l2c recursive loops commit 325bce7bb19e0e4828e6f7eba6ba6420a1f59f7a Author: William S. Moses Date: Wed Jun 29 22:41:14 2016 -0400 fix issue with loop parents commit 8e0997cb4b85e14c83783d81a7e3815d64fc6056 Author: William S. Moses Date: Wed Jun 29 21:10:51 2016 -0400 more efficient loops commit f302f9480f94a4e7f816707e5224c85e0bf07218 Author: William S. Moses Date: Wed Jun 29 01:05:05 2016 -0400 l2c computes grain size commit 1dbd257083c5d5e95fa662cc99da0b150aed94e2 Author: William S. Moses Date: Tue Jun 28 16:47:52 2016 -0400 more error info for bad return state commit ec4340b4cee3951abf49ad1636bff07cb77fb80f Author: William S. Moses Date: Mon Jun 27 17:57:49 2016 -0400 fix accidental breakage commit 88ceb1203926d59578e2c0dba02bf3b38f374120 Author: William S. Moses Date: Mon Jun 27 14:39:50 2016 -0400 fix loop2cilk indvar incr adding issue commit 0a1cbbf7dff910f348713a88108169e03dabf3de Author: William S. Moses Date: Fri Jun 24 13:43:53 2016 -0400 Better Parallel TRE commit bc96f0b3f141176d1667b1700be945aed7520e9c Author: William S. Moses Date: Fri Jun 24 01:38:46 2016 -0400 Parallel TRE commit 579d39d8efab448cacf9c41aea8197226c64bfe4 Author: William S. Moses Date: Thu Jun 23 13:47:13 2016 -0400 more secure sync detect for loop2cilk commit c06f49770a26c971efe66356b90a0a1ef7f2a301 Author: William S. Moses Date: Wed Jun 22 16:57:07 2016 -0400 Fix alloca issues for detached code commit 150056edc4a2bb03c0bbe94923cfa189ce44f052 Author: William S. Moses Date: Tue Jun 21 19:17:47 2016 -0400 minor opt diff commit 497c3b498bc8ce71ad913dff063853204810f402 Author: William S. Moses Date: Tue Jun 21 15:02:58 2016 -0400 modify pass commit 01e49c3727f69e2da875989b4e61ab10fc058327 Author: William S. Moses Date: Tue Jun 21 01:14:31 2016 -0400 fix loop2cilk recog issue commit 1c52cbf136f247110b7c9e4cac0a5a0d73ad63f7 Author: William S. Moses Date: Tue Jun 21 00:35:03 2016 -0400 remove pre sroa commit 510bfacf5154f48e729c159c95c965acf4eef120 Author: William S. Moses Date: Mon Jun 20 20:36:34 2016 -0400 loop2cilk fixes to indvar commit ef34ac80086a10e3ae04b9fd2ce4d99436eaa69e Author: Ubuntu Date: Mon Jun 20 19:00:07 2016 +0000 Resolve linker errors commit 4387eb25bb6e36f0e5f8d04c9d9d3f710864044a Author: William S. Moses Date: Mon Jun 20 14:47:48 2016 -0400 Loop2cilk new indvar calculation commit d4e44d43b5c6e40883975e87aa2c4c46759a8eb8 Author: William S. Moses Date: Mon Jun 20 04:10:48 2016 -0400 loop2cilk without opts commit 9164742231eb140864e17562dd7e79161685e293 Author: William S. Moses Date: Mon Jun 20 03:48:51 2016 -0400 correct loop bounds calculation commit d0d80c596491f3d8b7b9f2479f996f9345e9f059 Author: William S. Moses Date: Sun Jun 19 00:43:55 2016 -0400 clean up compile commit 26beb619a1384b470ca0e668c1a838ee85b78b75 Author: William S. Moses Date: Fri Jun 17 14:37:46 2016 -0400 remove debug message commit 76a163ddffdb916de1bee5fef34298e676266bff Author: Ubuntu Date: Wed Jun 15 20:58:36 2016 +0000 nomem commit 126c754b4f8e553e6b9ff33f899afaaf4182ee04 Author: William S. Moses Date: Wed Jun 15 15:41:57 2016 -0400 fixes and less print commit cd037d2993381148f11954f51ff89c6b5e599086 Author: William S. Moses Date: Tue Jun 14 23:33:28 2016 -0400 restore cilkabi commit 5964e893682feec3a63d17999d32c2125486e879 Author: William S. Moses Date: Tue Jun 14 23:19:52 2016 -0400 fix inline bug commit b5a22ebc589fc25b72f513eb16ccbedc6482e9f2 Author: William S. Moses Date: Tue Jun 14 14:32:41 2016 -0400 cleanup dumps commit 2ab9f07b81a7fb04c33926c2899c4af1753d6175 Author: William S. Moses Date: Tue Jun 14 14:30:04 2016 -0400 cleanup dumps commit 56d8d0f052de051328c2077bcd47e75f34d9f034 Author: William S. Moses Date: Tue Jun 14 12:35:26 2016 -0400 cleanup dumps commit d95ce1575159c12135952b3fa39a092bc77ad298 Author: William S. Moses Date: Tue Jun 14 12:29:38 2016 -0400 addl sroa fixes commit 2754c0b40a4ca26d3201005a1d2796b840bdcce7 Author: William S. Moses Date: Tue Jun 14 12:16:02 2016 -0400 loop2cilk ordering issue for ind var calculation fixed commit bebf5cc0565d9060e78a3caeb880b2ce8f43b36c Author: William S. Moses Date: Tue Jun 14 11:27:20 2016 -0400 Fix SROA for detached allocas commit 222ecb6dfd053282d450cbe9cffc7cea4d98fa5d Author: William S. Moses Date: Tue Jun 14 00:36:00 2016 -0400 minor bugfix commit 446ad1a3bad89a44dd2c361cc0d9417a0a07eb2b Author: William S. Moses Date: Mon Jun 13 21:59:25 2016 -0400 bugfixes commit bc37ee11a97c23b0576d45bcc94e7a597ff30a39 Author: William S. Moses Date: Thu Jun 9 10:43:21 2016 -0400 Fix odd LICM error commit abfc103a0f06248526972ddd6f6057e372d56383 Author: William S. Moses Date: Wed Jun 8 01:04:49 2016 -0400 parallel opt levels and fix codegen pt 1 commit cab96d82f5d94a4a6745983953f43850d3a80f7d Author: William S. Moses Date: Fri Jun 3 01:43:13 2016 -0400 fix compile script commit 6284487a349fe982d5d24d2ff45d8ff5c8d25708 Author: William S. Moses Date: Fri Jun 3 01:41:01 2016 -0400 fix l2c commit 3783dfebd1a8d94ab40b958e03ffb99ac54e3f5b Author: William S. Moses Date: Thu Jun 2 23:50:39 2016 -0400 Fix allocation issues commit fc2042d6a1331df9a55148208d27b2c2d4834ef7 Author: William S. Moses Date: Mon May 30 15:20:22 2016 -0400 add unique block debug info commit cd3303d769327d50bcf3a422496190ed349cbaac Author: William S. Moses Date: Mon May 30 15:17:18 2016 -0400 fix exit block detection l2c commit 4865203b50d0ad69531b6459a35d557908db3ffe Author: William S. Moses Date: Mon May 30 15:02:11 2016 -0400 fix sync l2c detection issue commit e95a55ae8775dfe21c0ce10e0ea32332bc3d973a Author: William S. Moses Date: Sun May 29 23:31:59 2016 -0400 allow switch and better cmp block commit b17417485a42308842840748c73c76953302dc30 Author: William S. Moses Date: Sun May 29 22:09:34 2016 -0400 fix issues in multiple phi nodes for l2c commit f64fca467066650bdab351a55ec38943d360fced Author: William S. Moses Date: Sun May 29 17:29:00 2016 -0400 add addl check for loop2cilk commit 8d9ac096f9beda10ff400631aae3336b5cb0982e Author: William S. Moses Date: Sat May 28 22:36:56 2016 -0400 minor script fix commit 748021ae6a76b9d6e2ecb85b3e247455d5e9bdb9 Author: William S. Moses Date: Sat May 28 22:24:41 2016 -0400 lots of minor cilk error fixes commit 0132cc1ce667fd8c21adaf5b3abd5dfadac80c09 Author: William S. Moses Date: Wed May 25 11:52:28 2016 -0400 fix bug in l2c about branching into commit 9f921005730c6c92fbdf19b36714488c72c0975e Author: William S. Moses Date: Tue May 24 23:40:12 2016 -0400 fix bug in loop2cilk commit a9d9cd9529c20022fd5ca0600042065cfee21d8f Author: William S. Moses Date: Sun Apr 10 14:32:22 2016 -0400 resolve block seg commit 7410b7bcfbf610b34a0f42c0966cbdbd2e9b2e97 Author: William S. Moses Date: Sun Apr 10 13:55:01 2016 -0400 fixes commit 11a77b870e734e617b00e4b55f09526cf2ac37d4 Author: William S. Moses Date: Thu Apr 7 03:04:30 2016 -0400 add compile commit f2ec969a1965da3224fdffed035b9d39114d2b9a Author: William S. Moses Date: Thu Apr 7 03:04:17 2016 -0400 pre detach merging / loop unroll fixes commit 9c00e9b80d865cf478607a4ddb90ca018ad2978c Author: William S. Moses Date: Thu Apr 7 00:27:15 2016 -0400 sync fix commit 1f3c6dcb9d48ba519fde34c66b657571949428f7 Author: William S. Moses Date: Thu Apr 7 00:12:58 2016 -0400 bug fixes commit 0f1b1cf061ab790622c6498e0df9c5487a8d610c Author: William S. Moses Date: Tue Apr 5 18:44:04 2016 -0400 resolve delete issues commit 86cd5870f9d667ff36b2c10971216e8f6d0977d0 Author: William S. Moses Date: Tue Apr 5 13:10:36 2016 -0400 resolve delete issues commit 06defa794acaf1f13ecdd63d57b38a49e2561492 Merge: 2f7e6ec4fa6 8b47c17a53d Author: William S. Moses Date: Tue Apr 5 11:57:10 2016 -0400 Merge remote-tracking branch 'llvm/release_38' commit 8b47c17a53d683f313eaaa93c4a53de26d8fcba5 Author: Dimitry Andric Date: Tue Apr 5 06:58:21 2016 +0000 Merging r264335: ------------------------------------------------------------------------ r264335 | dim | 2016-03-24 21:39:17 +0100 (Thu, 24 Mar 2016) | 17 lines Add to ThreadPool.h, since std::atomic is used Summary: Apparently, when compiling with gcc 5.3.2 for powerpc64, the order of headers is such that it gets an error about std::atomic<> use in ThreadPool.h, since this header is not included explicitly. See also: https://llvm.org/bugs/show_bug.cgi?id=27058 Fix this by including . Patch by Bryan Drewery. Reviewers: chandlerc, joker.eph Subscribers: bdrewery, llvm-commits Differential Revision: http://reviews.llvm.org/D18460 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265380 91177308-0d34-0410-b5e6-96231b3b80d8 commit 295c7a62d88d363361198766ce95900441727da9 Author: Renato Golin Date: Sat Apr 2 20:36:55 2016 +0000 Merging r263714: ARM: Revert SVN r253865, 254158, fix windows division git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265245 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2a2d901e3c55aff48990de5e415c429c4cfeb6d8 Author: Renato Golin Date: Sat Apr 2 20:32:54 2016 +0000 Merging r263123: ARM: follow up improvements for SVN r263118 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265244 91177308-0d34-0410-b5e6-96231b3b80d8 commit 97a35e605ab417f11be4ccb532fcc9015ebb2ca8 Author: Renato Golin Date: Sat Apr 2 20:31:15 2016 +0000 Merging r263118: ARM: correct __builtin_longjmp on WoA git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@265243 91177308-0d34-0410-b5e6-96231b3b80d8 commit dec3a22cf5b8f8e6c6d1bf898f3a14bc4c54e0b4 Author: Tom Stellard Date: Mon Mar 28 18:13:48 2016 +0000 Bump version to 3.8.1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@264605 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2f7e6ec4fa663dff11ba3dff5f74468e79c042d9 Author: TB Schardl Date: Thu Mar 17 08:15:50 2016 +0000 Cleaning up CilkABI. commit 88a51fc0886146600e14173a0878b6567b29e3bc Author: TB Schardl Date: Thu Mar 17 08:15:05 2016 +0000 Fixing Loop2Cilk CMakeLists entries to fix cmake build. commit 0d0d243f395a4192bf4d85817c8ac14f5d9d8b2f Author: TB Schardl Date: Thu Mar 17 08:14:16 2016 +0000 Fixing Loop2Cilk for merge with 'release_38' commit 277ca2c63350507bf3ba5cd075f204e4b356fc5f Merge: 008aa9d2441 ad5750369cc Author: TB Schardl Date: Thu Mar 17 08:09:16 2016 +0000 Merge branch 'release_38' of http://llvm.org/git/llvm into tb-scratch commit 008aa9d24417420734027b5072ea48cc86b428d2 Author: William S. Moses Date: Sat Mar 12 17:32:11 2016 -0500 loop2cilk working happily commit ea5e316db15804df27dcfaf6b790f07c8e7bd2b2 Merge: 9b3fc2538fd 1526147c0ad Author: William S. Moses Date: Thu Mar 10 13:16:18 2016 -0500 Merge branch 'tb-scratch' of ssh://github.com/taekwonbilly/Parallel-IR into tb-scratch commit 9b3fc2538fdd9218bcb1a91b954028652579c6e4 Author: William S. Moses Date: Thu Mar 10 13:15:45 2016 -0500 loop2cilk mods commit ad5750369cc5b19f36c149f7b13151c99c7be47a Author: Hans Wennborg Date: Wed Mar 2 23:38:03 2016 +0000 ReleaseNotes: tidy up git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262542 91177308-0d34-0410-b5e6-96231b3b80d8 commit 0805780408c97128dc9164d4dbb8604882f5588e Author: Hans Wennborg Date: Wed Mar 2 23:10:55 2016 +0000 Remove 'if you are using a released version' warning git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262537 91177308-0d34-0410-b5e6-96231b3b80d8 commit f26161e8b05360841a1a3a4a2204ed761d6a2e04 Author: Hans Wennborg Date: Wed Mar 2 18:19:22 2016 +0000 ReleaseNotes: C API policy; by Eric Christopher git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262496 91177308-0d34-0410-b5e6-96231b3b80d8 commit 27c964e2ae0b573cf1e6551a3da255539db03d3c Author: Hans Wennborg Date: Fri Feb 26 21:37:52 2016 +0000 ReleaseNotes: PowerPC; by Kit Barton git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@262074 91177308-0d34-0410-b5e6-96231b3b80d8 commit bb6f14e3581c78509405a3d415e72821db8a2066 Author: Quentin Colombet Date: Mon Feb 22 22:27:47 2016 +0000 [AArch64] Fix bug in prolog clobbering live reg when shrink wrapping. This adapts r261349 to the release branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261586 91177308-0d34-0410-b5e6-96231b3b80d8 commit e970b795a27d16c720bf4e3ff030eea241784eb4 Author: Hans Wennborg Date: Mon Feb 22 21:05:14 2016 +0000 Merging r261441, r261447, and r261546: ------------------------------------------------------------------------ r261441 | nemanjai | 2016-02-20 10:16:25 -0800 (Sat, 20 Feb 2016) | 12 lines Fix for PR 26500 This patch corresponds to review: http://reviews.llvm.org/D17294 It ensures that whatever block we are emitting the prologue/epilogue into, we have the necessary scratch registers. It takes away the hard-coded register numbers for use as scratch registers as registers that are guaranteed to be available in the function prologue/epilogue are not guaranteed to be available within the function body. Since we shrink-wrap, the prologue/epilogue may end up in the function body. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r261447 | nemanjai | 2016-02-20 12:45:37 -0800 (Sat, 20 Feb 2016) | 6 lines Fix the build bot break caused by rL261441. The patch has a necessary call to a function inside an assert. Which is fine when you have asserts turned on. Not so much when they're off. Sorry about the regression. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r261546 | nemanjai | 2016-02-22 10:04:00 -0800 (Mon, 22 Feb 2016) | 6 lines Fix for PR26690 take 2 This is what was meant to be in the initial commit to fix this bug. The parens were missing. This commit also adds a test case for the bug and has undergone full testing on PPC and X86. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261572 91177308-0d34-0410-b5e6-96231b3b80d8 commit f65e46be097186d748836d42c38a6dc7f30e6c3b Author: Hans Wennborg Date: Mon Feb 22 17:51:28 2016 +0000 Merging r261387: ------------------------------------------------------------------------ r261387 | davide | 2016-02-19 16:44:47 -0800 (Fri, 19 Feb 2016) | 8 lines [X86ISelLowering] Fix TLSADDR lowering when shrink-wrapping is enabled. TLSADDR nodes are lowered into actuall calls inside MC. In order to prevent shrink-wrapping from pushing prologue/epilogue past them (which result in TLS variables being accessed before the stack frame is set up), we put markers, so that the stack gets adjusted properly. Thanks to Quentin Colombet for guidance/help on how to fix this problem! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261542 91177308-0d34-0410-b5e6-96231b3b80d8 commit e3b2bd1e79c9c9d24490b6ddb2341afcf4210691 Author: Hans Wennborg Date: Mon Feb 22 17:47:10 2016 +0000 Merging r261384: ------------------------------------------------------------------------ r261384 | qcolombet | 2016-02-19 16:32:29 -0800 (Fri, 19 Feb 2016) | 4 lines [RegAllocFast] Properly track the physical register definitions on calls. PR26485 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261539 91177308-0d34-0410-b5e6-96231b3b80d8 commit c63a0fe41b81bac1ea6e1a053d2a8939e02edf17 Author: Hans Wennborg Date: Fri Feb 19 21:42:57 2016 +0000 Merging r261368: ------------------------------------------------------------------------ r261368 | hans | 2016-02-19 13:40:12 -0800 (Fri, 19 Feb 2016) | 3 lines Revert r255691 "[LoopVectorizer] Refine loop vectorizer's register usage calculator by ignoring specific instructions." It caused PR26509. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261369 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78e9cd40a2ea27cc9300d900a7dccc75940f9eb0 Author: Hans Wennborg Date: Fri Feb 19 21:35:00 2016 +0000 Merging r261360: ------------------------------------------------------------------------ r261360 | dim | 2016-02-19 12:14:11 -0800 (Fri, 19 Feb 2016) | 19 lines Fix incorrect selection of AVX512 sqrt when OptForSize is on Summary: When optimizing for size, sqrt calls can be incorrectly selected as AVX512 VSQRT instructions. This is because X86InstrAVX512.td has a `Requires<[OptForSize]>` in its `avx512_sqrt_scalar` multiclass definition. Even if the target does not support AVX512, the class can apparently still be chosen, leading to an incorrect selection of `vsqrtss`. In PR26625, this lead to an assertion: Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!", because the `vsqrtss` instruction requires an XMM register, which is not available on i686 CPUs. Reviewers: grosbach, resistor, joker.eph Subscribers: spatel, emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D17414 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261367 91177308-0d34-0410-b5e6-96231b3b80d8 commit fdf40bea4fc416643210790fff4345be98d97245 Author: Hans Wennborg Date: Fri Feb 19 21:28:08 2016 +0000 Merging r261365: ------------------------------------------------------------------------ r261365 | hans | 2016-02-19 13:26:31 -0800 (Fri, 19 Feb 2016) | 3 lines Revert r253557 "Alternative to long nops for X86 CPUs, by Andrey Turetsky" Turns out the new nop sequences aren't actually nops on x86_64 (PR26554). ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261366 91177308-0d34-0410-b5e6-96231b3b80d8 commit 413ee9f101de92d75fc11334ffeb6a054d67a18c Author: Renato Golin Date: Fri Feb 19 17:35:27 2016 +0000 Merge r261331: avoid out of bounds loads for interleaved access vectorization git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261341 91177308-0d34-0410-b5e6-96231b3b80d8 commit 124d2bc4dc3298d2b669be23a5b640d985319b65 Author: Hans Wennborg Date: Fri Feb 19 17:13:16 2016 +0000 Merging r261306: ------------------------------------------------------------------------ r261306 | matze | 2016-02-18 20:44:19 -0800 (Thu, 18 Feb 2016) | 1 line LegalizeDAG: Fix ExpandFCOPYSIGN assuming the same type on both inputs ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261334 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6f28d52e9d3f87875732a0f2c1f3b03ef56be2db Author: Hans Wennborg Date: Fri Feb 19 00:08:56 2016 +0000 Merging r261258: ------------------------------------------------------------------------ r261258 | rnk | 2016-02-18 12:57:41 -0800 (Thu, 18 Feb 2016) | 14 lines [IR] Straighten out bundle overload of IRBuilder::CreateCall IRBuilder has two ways of putting bundle operands on calls: the default operand bundle, and an overload of CreateCall that takes an operand bundle list. Previously, this overload used a default argument of None. This made it impossible to distinguish between the case were the caller doesn't care about bundles, and the case where the caller explicitly wants no bundles. We behaved as if they wanted the latter behavior rather than the former, which led to problems with simplifylibcalls and WinEH. This change fixes it by making the parameter non-optional, so we can distinguish these two cases. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261282 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6e961aa243f223ddb704ce708056238d7c1d7e24 Author: Hans Wennborg Date: Wed Feb 17 19:00:40 2016 +0000 Merging r261039: ------------------------------------------------------------------------ r261039 | rnk | 2016-02-16 16:17:33 -0800 (Tue, 16 Feb 2016) | 6 lines [X86] Fix a shrink-wrapping miscompile around __chkstk __chkstk clobbers EAX. If EAX is live across the prologue, then we have to take extra steps to save it. We already had code to do this if EAX was a register parameter. This change adapts it to work when shrink wrapping is used. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261136 91177308-0d34-0410-b5e6-96231b3b80d8 commit ebe537a930b58a5d32fc41ac133309139c92f7bd Author: David Majnemer Date: Wed Feb 17 18:49:28 2016 +0000 Merging r258616: ------------------------------------------------------------------------ r258616 | majnemer | 2016-01-22 22:00:44 -0800 (Fri, 22 Jan 2016) | 3 lines [PruneEH] Don't try to insert a terminator after another terminator LLVM's BasicBlock has a single terminator, it is not valid to have two. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261132 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9f25a0678ed9f06088a09649a040a6bef362e6af Author: David Majnemer Date: Wed Feb 17 18:49:09 2016 +0000 Merging r258611: ------------------------------------------------------------------------ r258611 | majnemer | 2016-01-22 21:41:29 -0800 (Fri, 22 Jan 2016) | 6 lines [PruneEH] FuncletPads must not have undef operands Instead of RAUW with undef, replace the first non-token instruction with unreachable. This fixes PR26263. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261131 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4212ebff28e32dbd26bd93f4fa77190d80357ed4 Author: David Majnemer Date: Wed Feb 17 18:48:45 2016 +0000 Merging r258610: ------------------------------------------------------------------------ r258610 | majnemer | 2016-01-22 21:41:27 -0800 (Fri, 22 Jan 2016) | 3 lines [PruneEH] Unify invoke and call handling in DeleteBasicBlock No functionality change is intended. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261130 91177308-0d34-0410-b5e6-96231b3b80d8 commit ba95fe05372c1934c30e21747480d401c1e5bcec Author: David Majnemer Date: Wed Feb 17 18:48:28 2016 +0000 Merging r258609: ------------------------------------------------------------------------ r258609 | majnemer | 2016-01-22 21:41:22 -0800 (Fri, 22 Jan 2016) | 5 lines [PruneEH] Reuse code from removeUnwindEdge PruneEH had functionality idential to removeUnwindEdge. Consolidate around removeUnwindEdge. No functionality change is intended. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261129 91177308-0d34-0410-b5e6-96231b3b80d8 commit 77c8a562e0c7c47df3bb988e2d230df6a9dcbe1d Author: David Majnemer Date: Wed Feb 17 18:42:17 2016 +0000 Merging r259702: ------------------------------------------------------------------------ r259702 | majnemer | 2016-02-03 13:30:34 -0800 (Wed, 03 Feb 2016) | 7 lines [LoopStrengthReduce] Don't rewrite PHIs with incoming values from CatchSwitches Bail out if we have a PHI on an EHPad that gets a value from a CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is no good place to stick any instructions. This fixes PR26373. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261126 91177308-0d34-0410-b5e6-96231b3b80d8 commit c75c50f45b3d6d1d61ce6b411d12cedaadd71d5b Author: David Majnemer Date: Wed Feb 17 18:41:44 2016 +0000 Merging r260164: ------------------------------------------------------------------------ r260164 | akaylor | 2016-02-08 14:52:51 -0800 (Mon, 08 Feb 2016) | 5 lines [regalloc][WinEH] Do not mark intervals as not spillable if they contain a regmask Differential Revision: http://reviews.llvm.org/D16831 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261125 91177308-0d34-0410-b5e6-96231b3b80d8 commit fde3338c42eb085f169ecc3817c4736075e4a683 Author: David Majnemer Date: Wed Feb 17 18:41:08 2016 +0000 Merging r260733: ------------------------------------------------------------------------ r260733 | akaylor | 2016-02-12 13:10:16 -0800 (Fri, 12 Feb 2016) | 5 lines [WinEH] Prevent EH state numbering from skipping nested cleanup pads that never return Differential Revision: http://reviews.llvm.org/D17208 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261124 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2507c58ca21ee01c359cd5ddf2fe84eea16366ee Author: Hans Wennborg Date: Wed Feb 17 17:57:26 2016 +0000 ReleaseNotes: new Win EH instructions; by David Majnemer git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261116 91177308-0d34-0410-b5e6-96231b3b80d8 commit d77e9352a80c954cf91335c236224e4ca7d9c5f4 Author: Hans Wennborg Date: Wed Feb 17 16:40:51 2016 +0000 Merging r261033: ------------------------------------------------------------------------ r261033 | akaylor | 2016-02-16 15:52:18 -0800 (Tue, 16 Feb 2016) | 5 lines Fix build LLVM with -D LLVM_USE_INTEL_JITEVENTS:BOOL=ON on Windows Differential Revision: http://reviews.llvm.org/D16940 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261106 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7609bf251117db67abfe0d5b6622860afc769278 Author: Hans Wennborg Date: Wed Feb 17 00:05:18 2016 +0000 ReleaseNotes: -femultated-tls; by Chih-hung Hsieh git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261035 91177308-0d34-0410-b5e6-96231b3b80d8 commit 07fd930a2be55b0789737cd9769f0d0e42def3a7 Author: Hans Wennborg Date: Tue Feb 16 23:22:17 2016 +0000 Merging r260390: ------------------------------------------------------------------------ r260390 | jyknight | 2016-02-10 09:47:20 -0800 (Wed, 10 Feb 2016) | 12 lines [SPARC] Repair floating-point condition encodings in assembly parser. The encodings for floating point conditions A(lways) and N(ever) were incorrectly specified for the assembly parser, per Sparc manual v8 page 121. This change corrects that mistake. Also, strangely, all of the branch instructions already had MC test cases, except for the broken ones. Added the tests. Patch by Chris Dewhurst Differential Revision: http://reviews.llvm.org/D17074 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261031 91177308-0d34-0410-b5e6-96231b3b80d8 commit b7b1a389f7d546dbe6a67aa3bb0e66f689e99c1b Author: Hans Wennborg Date: Tue Feb 16 21:46:52 2016 +0000 Merging r258103: ------------------------------------------------------------------------ r258103 | kli | 2016-01-18 16:04:41 -0800 (Mon, 18 Jan 2016) | 2 lines parseArch() supports more variations of arch names for PowerPC builds ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261015 91177308-0d34-0410-b5e6-96231b3b80d8 commit fff361d60b64ac8ee9fcb523872aa7beea8ab8e1 Author: Hans Wennborg Date: Tue Feb 16 19:37:14 2016 +0000 ReleaseNotes: shrink-wrapping; by Quentin Colombet git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261001 91177308-0d34-0410-b5e6-96231b3b80d8 commit b129a10bb92529289bbb26d2335b12858e54a885 Author: Hans Wennborg Date: Tue Feb 16 19:29:54 2016 +0000 ReleaseNotes: typo git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@261000 91177308-0d34-0410-b5e6-96231b3b80d8 commit d3b1222c56e9214e49a3d829e8e60910f8c88903 Author: Hans Wennborg Date: Tue Feb 16 19:27:50 2016 +0000 ReleaseNotes: Hexagon; by Krzysztof Parzyszek git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260999 91177308-0d34-0410-b5e6-96231b3b80d8 commit f1aaed61455e48b6c7444f706a6f997a864a42fa Author: Hans Wennborg Date: Tue Feb 16 19:20:40 2016 +0000 Merging r257864 and r258112: ------------------------------------------------------------------------ r257864 | axw | 2016-01-14 19:33:35 -0800 (Thu, 14 Jan 2016) | 12 lines [docs] Document LLVM_{BUILD,LINK}_LLVM_DYLIB Summary: Document the LLVM_BUILD_LLVM_DYLIB and LLVM_LINK_LLVM_DYLIB CMake options, move BUILD_SHARED_LIBS out of frequently-used, and add a note/warning to BUILD_SHARED_LIBS. Reviewers: beanz, delcypher, mjacob Subscribers: mjacob, llvm-commits Differential Revision: http://reviews.llvm.org/D16208 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r258112 | axw | 2016-01-18 21:43:21 -0800 (Mon, 18 Jan 2016) | 8 lines docs: address post-commit review Rewording/expansion of CMake options suggested by Dan Liew. See http://reviews.llvm.org/D16208. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260996 91177308-0d34-0410-b5e6-96231b3b80d8 commit 80cc2ce6475352a29e19824443c2e0a31a37b44d Author: Hans Wennborg Date: Tue Feb 16 19:19:03 2016 +0000 ReleaseNotes: -DLLVM_LINK_LLVM_DYLIB=ON; by Andrew Wilkins git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260995 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1e466cf4f8098acc7025f8d71dd0f64c4754ed63 Author: Hans Wennborg Date: Tue Feb 16 19:07:38 2016 +0000 ReleaseNotes: ORC in Kaleidoscope and C bindings; by Lang Hames git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260991 91177308-0d34-0410-b5e6-96231b3b80d8 commit b508a338d9d922a1ec3fbef698bd9fc6b5217ae0 Author: Hans Wennborg Date: Tue Feb 16 17:38:25 2016 +0000 ReleaseNotes: fix typo, reported by Eugene git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260985 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4f229233ffc588a35e3738d3c358f2cf7a5da1d1 Author: Hans Wennborg Date: Fri Feb 12 19:03:12 2016 +0000 Merging r260703: ------------------------------------------------------------------------ r260703 | hans | 2016-02-12 11:02:39 -0800 (Fri, 12 Feb 2016) | 11 lines [CMake] don't build libLTO when LLVM_ENABLE_PIC is OFF When cmake is run with -DLLVM_ENABLE_PIC=OFF, build fails while linking shared library libLTO.so, because its dependencies are built with -fno-PIC. More details here: https://llvm.org/bugs/show_bug.cgi?id=26484. This diff reverts r252652 (git 9fd4377ddb83aee3c049dc8757e7771edbb8ee71), which removed check NOT LLVM_ENABLE_PIC before disabling build for libLTO.so. Patch by Igor Sugak! Differential Revision: http://reviews.llvm.org/D17049 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260704 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7e2ddb94a31d1d085b0228e374799566faa82b8e Author: Peter Collingbourne Date: Fri Feb 12 18:46:48 2016 +0000 ARM: Mention r251322 in release notes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260702 91177308-0d34-0410-b5e6-96231b3b80d8 commit 347f4e82e80af64eca192381112ff6e9e3c7c8c3 Author: Hans Wennborg Date: Fri Feb 12 17:52:29 2016 +0000 Merging r260641: ------------------------------------------------------------------------ r260641 | axw | 2016-02-11 17:42:43 -0800 (Thu, 11 Feb 2016) | 10 lines Avoid linking LLVM component libraries with libLLVM Patch by Jack Howarth. When linking to libLLVM, don't also link to the component libraries that constitute libLLVM. Differential Revision: http://reviews.llvm.org/D16945 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260693 91177308-0d34-0410-b5e6-96231b3b80d8 commit e469b8a4f8daa8d29fe1d1f8ed87b36114dd5726 Author: Hans Wennborg Date: Fri Feb 12 16:18:07 2016 +0000 Merging r260427: ------------------------------------------------------------------------ r260427 | nha | 2016-02-10 12:13:58 -0800 (Wed, 10 Feb 2016) | 16 lines AMDGPU: Release the scavenged offset register during VGPR spill Summary: This fixes a crash where subsequent spills would be unable to scavenge a register. In particular, it fixes a crash in piglit's spec@glsl-1.50@execution@geometry@max-input-components (the test still has a shader that fails to compile because of too many SGPR spills, but at least it doesn't crash any more). This is a candidate for the release branch. Reviewers: arsenm, tstellarAMD Subscribers: qcolombet, arsenm Differential Revision: http://reviews.llvm.org/D16558 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260687 91177308-0d34-0410-b5e6-96231b3b80d8 commit ec95d6fe25dcb8b1450c4440da7c7a7e2982b6f2 Author: Renato Golin Date: Fri Feb 12 15:29:34 2016 +0000 [ARM/AArch64] 3.8.0 release notes changes git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260684 91177308-0d34-0410-b5e6-96231b3b80d8 commit 10a5589d08c1de3fcd715ce23697d4e591519595 Author: Dylan McKay Date: Fri Feb 12 06:38:02 2016 +0000 [AVR] Add release notes for 3.8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260659 91177308-0d34-0410-b5e6-96231b3b80d8 commit 12009f63c5d16b98334930a2b97d279c6bf82ea0 Author: Hans Wennborg Date: Fri Feb 12 02:32:24 2016 +0000 ReleaseNotes: oh, there already was a section about X86 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260650 91177308-0d34-0410-b5e6-96231b3b80d8 commit fb52ed812c40eb8c6f1f69575bb231b62b319a95 Author: Hans Wennborg Date: Fri Feb 12 02:29:33 2016 +0000 ReleaseNotes: start off a 'Changes to X86' section git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260648 91177308-0d34-0410-b5e6-96231b3b80d8 commit e293d6c8d134ad352bb69defee17c5c902476933 Author: Hans Wennborg Date: Fri Feb 12 01:56:35 2016 +0000 Release Notes: RegisterScheduler::setDefault removed; by Mehdi Amini git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260643 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7a0ec464f16e761602ac9c4e1f610029c0346745 Author: Hans Wennborg Date: Fri Feb 12 01:42:38 2016 +0000 Merging r260587: ------------------------------------------------------------------------ r260587 | pete | 2016-02-11 13:10:40 -0800 (Thu, 11 Feb 2016) | 13 lines Set load alignment on aggregate loads. When optimizing a extractvalue(load), we generate a load from the aggregate type. This load didn't have alignment set and so would get the alignment of the type. This breaks when the type is packed and so the alignment should be lower. For example, loading { int, int } would give us alignment of 4, but the original load from this type may have an alignment of 1 if packed. Reviewed by David Majnemer Differential revision: http://reviews.llvm.org/D17158 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260640 91177308-0d34-0410-b5e6-96231b3b80d8 commit 73a8ae3c0f127d45e391bd8b40be51c2fbc15dd8 Author: Hans Wennborg Date: Fri Feb 12 00:45:55 2016 +0000 ReleaseNotes: drop in-progress warning and svn checkout note git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260634 91177308-0d34-0410-b5e6-96231b3b80d8 commit 2ec5a319cacb9e13bf20bc8b9113d11212f10aae Author: Kai Nacke Date: Thu Feb 11 20:42:16 2016 +0000 Add LDC compiler to list of external OS projects using LLVM 3.8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260584 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6ca6b8a0c8560555aed16b880f1499a5a0b4deda Author: Duncan P. N. Exon Smith Date: Wed Feb 10 19:20:23 2016 +0000 ReleaseNotes: Document changes to ilist API git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260415 91177308-0d34-0410-b5e6-96231b3b80d8 commit 185bb1287f864701d9b19eef89e7838162e7c793 Author: Hans Wennborg Date: Mon Feb 8 22:15:55 2016 +0000 Merging r259958: ------------------------------------------------------------------------ r259958 | evandro | 2016-02-05 16:01:41 -0800 (Fri, 05 Feb 2016) | 11 lines [AArch64] Add the scheduling model for Exynos-M1 Summary: Add the core scheduling model for the Samsung Exynos-M1 (ARMv8-A). Reviewers: jmolloy, rengolin, christof, MinSeongKIM, t.p.northover Subscribers: aemerson, rengolin, MatzeB Differential Revision: http://reviews.llvm.org/D16644 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260156 91177308-0d34-0410-b5e6-96231b3b80d8 commit 777479f80202057f041683129d4fd9e574ffea79 Author: Hans Wennborg Date: Mon Feb 8 18:31:49 2016 +0000 Merging r259696: ------------------------------------------------------------------------ r259696 | kfischer | 2016-02-03 13:13:33 -0800 (Wed, 03 Feb 2016) | 12 lines [DWARFDebug] Fix another case of overlapping ranges Summary: In r257979, I added code to ensure that we wouldn't merge DebugLocEntries if the pieces they describe overlap. Unfortunately, I failed to cover the case, where there may have multiple active Expressions in the entry, in which case we need to make sure that no two values overlap before we can perform the merge. This fixed PR26148. Reviewers: aprantl Differential Revision: http://reviews.llvm.org/D16742 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260121 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7ecd92d75cda45668b6b5fdbcdd2142826514e66 Author: Daniel Sanders Date: Mon Feb 8 14:14:18 2016 +0000 [mips] Add initial release notes for MIPS32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@260095 91177308-0d34-0410-b5e6-96231b3b80d8 commit ff65de018b6bb5bc4da3e923bbc0f55c5ca8e039 Author: Hans Wennborg Date: Fri Feb 5 22:17:38 2016 +0000 Merging r259381: ------------------------------------------------------------------------ r259381 | uweigand | 2016-02-01 10:31:19 -0800 (Mon, 01 Feb 2016) | 21 lines [SystemZ] Fix wrong-code generation for certain always-false conditions We've found another bug in the code generation logic conditions for a certain class of always-false conditions, those of the form if ((a & 1) < 0) These only reach the back end when compiling without optimization. The bug was introduced by the choice of using TEST UNDER MASK to implement a check for if ((a & MASK) < VAL) as if ((a & MASK) == 0) where VAL is less than the the lowest bit of MASK. This is correct in all cases except for VAL == 0, in which case the original condition is always false, but the replacement isn't. Fixed by excluding that particular case. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259940 91177308-0d34-0410-b5e6-96231b3b80d8 commit 56d368f5a52e60fa29891a6647034fffbba8713b Author: Hans Wennborg Date: Fri Feb 5 16:30:31 2016 +0000 Merging r259886 and r259888: ------------------------------------------------------------------------ r259886 | nemanjai | 2016-02-05 06:50:29 -0800 (Fri, 05 Feb 2016) | 5 lines Fix for PR 26193 This is a simple fix for a PowerPC intrinsic that was incorrectly defined (the return type was incorrect). ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259888 | nemanjai | 2016-02-05 07:03:17 -0800 (Fri, 05 Feb 2016) | 3 lines Add the missing test case for PR26193 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259891 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9be4dc8ab20a009ed5f24610888421ba84f8ec65 Author: Hans Wennborg Date: Fri Feb 5 00:55:39 2016 +0000 Merging r259840 on top of r259178: ------------------------------------------------------------------------ r259178 | echristo | 2016-01-28 23:20:30 -0800 (Thu, 28 Jan 2016) | 1 line Refactor common code for PPC fast isel load immediate selection. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259840 | nemanjai | 2016-02-04 15:14:42 -0800 (Thu, 04 Feb 2016) | 7 lines Fix for PR 26356 Using the load immediate only when the immediate (whether signed or unsigned) can fit in a 16-bit signed field. Namely, from -32768 to 32767 for signed and 0 to 65535 for unsigned. This patch also ensures that we sign-extend under the right conditions. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259858 91177308-0d34-0410-b5e6-96231b3b80d8 commit 12d60e9e7c149a7d333e277dfbe25a720c88c585 Author: Hans Wennborg Date: Fri Feb 5 00:46:12 2016 +0000 Merging r259798, r259835: ------------------------------------------------------------------------ r259798 | nemanjai | 2016-02-04 08:18:08 -0800 (Thu, 04 Feb 2016) | 9 lines Enable the %s modifier in inline asm template string This patch corresponds to review: http://reviews.llvm.org/D16847 There are some files in glibc that use the output operand modifier even though it was deprecated in GCC. This patch just adds support for it to prevent issues with such files. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r259835 | nemanjai | 2016-02-04 14:36:10 -0800 (Thu, 04 Feb 2016) | 3 lines Provide a test case for rl259798 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259856 91177308-0d34-0410-b5e6-96231b3b80d8 commit 78a7d49140626994c23367b709e7b30b41e5cf70 Author: Hans Wennborg Date: Thu Feb 4 16:59:45 2016 +0000 Merging r259695: ------------------------------------------------------------------------ r259695 | tfiala | 2016-02-03 13:13:23 -0800 (Wed, 03 Feb 2016) | 11 lines Address NDEBUG-related linkage issues for Value::assertModuleIsMaterialized() The IR/Value class had a linkage issue present when LLVM was built as a library, and the LLVM library build time had different settings for NDEBUG than the client of the LLVM library. Clients could get into a state where the LLVM lib expected Value::assertModuleIsMaterialized() to be inline-defined in the header but clients expected that method to be defined in the LLVM library. See this llvm-commits thread for more details: http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20160201/329667.html ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259801 91177308-0d34-0410-b5e6-96231b3b80d8 commit 19b86f670bb5005761ecdcbe41423fee7fd200cf Author: Hans Wennborg Date: Thu Feb 4 02:16:36 2016 +0000 Merging r259740: ------------------------------------------------------------------------ r259740 | nemanjai | 2016-02-03 17:58:20 -0800 (Wed, 03 Feb 2016) | 2 lines Test case for PR 26381 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259743 91177308-0d34-0410-b5e6-96231b3b80d8 commit 0a7ec6ced609c340fc4028aa8a65996623dd4181 Author: Hans Wennborg Date: Wed Feb 3 22:00:13 2016 +0000 Merging r259177: ------------------------------------------------------------------------ r259177 | echristo | 2016-01-28 23:20:01 -0800 (Thu, 28 Jan 2016) | 5 lines Since LI/LIS sign extend the constant passed into the instruction we should check that the sign extended constant fits into 16-bits if we want a zero extended value, otherwise go ahead and put it together piecemeal. Fixes PR26356. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259713 91177308-0d34-0410-b5e6-96231b3b80d8 commit 6b78a48f5c068df653f1c12d2ad7832aaa45c7a1 Author: Hans Wennborg Date: Wed Feb 3 21:24:31 2016 +0000 Merging r259649: ------------------------------------------------------------------------ r259649 | jamesm | 2016-02-03 07:05:06 -0800 (Wed, 03 Feb 2016) | 11 lines [DemandedBits] Revert r249687 due to PR26071 This regresses a test in LoopVectorize, so I'll need to go away and think about how to solve this in a way that isn't broken. From the writeup in PR26071: What's happening is that ComputeKnownZeroes is telling us that all bits except the LSB are zero. We're then deciding that only the LSB needs to be demanded from the icmp's inputs. This is where we're wrong - we're assuming that after simplification the bits that were known zero will continue to be known zero. But they're not - during trivialization the upper bits get changed (because an XOR isn't shrunk), so the icmp fails. The fault is in demandedbits - its contract does clearly state that a non-demanded bit may either be zero or one. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259699 91177308-0d34-0410-b5e6-96231b3b80d8 commit 18a86c95fc36b5f622e8dc87f71252de37a1ed44 Author: Hans Wennborg Date: Wed Feb 3 21:18:35 2016 +0000 Merging r259645: ------------------------------------------------------------------------ r259645 | nemanjai | 2016-02-03 04:53:38 -0800 (Wed, 03 Feb 2016) | 4 lines Fix for PR 26381 Simple fix - Constant values were not being sign extended in FastIsel. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259698 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1bfe978e5d0ac77f381b0ccef78204f7f3593a01 Author: Hans Wennborg Date: Tue Feb 2 17:41:39 2016 +0000 Merging r259346 (with adjustments for r258867): ------------------------------------------------------------------------ r259346 | ibreger | 2016-02-01 01:57:15 -0800 (Mon, 01 Feb 2016) | 3 lines AVX512: fix mask handling for gather/scatter/prefetch intrinsics. Differential Revision: http://reviews.llvm.org/D16755 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259536 91177308-0d34-0410-b5e6-96231b3b80d8 commit f24a5b58cd7ecc4fada221308073b9f13672d6c0 Author: Hans Wennborg Date: Tue Feb 2 17:35:07 2016 +0000 Merging r259342 (with s/p2align 4/align 16) because r258750 is not in 3.8. ------------------------------------------------------------------------ r259342 | ibreger | 2016-01-31 23:56:09 -0800 (Sun, 31 Jan 2016) | 3 lines AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259533 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ea3635939d3e30182cd5a9881447890c8b69c42 Author: Hans Wennborg Date: Mon Feb 1 19:18:10 2016 +0000 Merging r259375: ------------------------------------------------------------------------ r259375 | majnemer | 2016-02-01 09:37:56 -0800 (Mon, 01 Feb 2016) | 6 lines [InstCombine] Don't transform (X+INT_MAX)>=(Y+INT_MAX) -> (X<=Y) This miscompile came about because we tried to use a transform which was only appropriate for xor operators when addition was present. This fixes PR26407. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259390 91177308-0d34-0410-b5e6-96231b3b80d8 commit aad888f28ee3e920b6e1a3828398f6c9c256f3d3 Author: Tim Northover Date: Fri Jan 29 22:00:06 2016 +0000 Merging r259228: ------------------------------------------------------------------------ r259228 | tnorthover | 2016-01-29 11:18:46 -0800 (Fri, 29 Jan 2016) | 13 lines ARM: don't mangle DAG constant if it has more than one use The basic optimisation was to convert (mul $LHS, $complex_constant) into roughly "(shl (mul $LHS, $simple_constant), $simple_amt)" when it was expected to be cheaper. The original logic checks that the mul only has one use (since we're mangling $complex_constant), but when used in even more complex addressing modes there may be an outer addition that can pick up the wrong value too. I *think* the ARM addressing-mode problem is actually unreachable at the moment, but that depends on complex assessments of the profitability of pre-increment addressing modes so I've put a real check in there instead of an assertion. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259247 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ad5d2c5359a4e878c732db59ee7fc6e0a25dc00 Author: Hans Wennborg Date: Fri Jan 29 21:33:02 2016 +0000 Merging r259236: ------------------------------------------------------------------------ r259236 | spatel | 2016-01-29 12:21:02 -0800 (Fri, 29 Jan 2016) | 8 lines [InstCombine] avoid an insertelement transformation that induces the opposite extractelement fold (PR26354) We would infinite loop because we created a shufflevector that was wider than needed and then failed to combine that with the insertelement. When subsequently visiting the extractelement from that shuffle, we see that it's unnecessary, delete it, and trigger another visit to the insertelement. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259245 91177308-0d34-0410-b5e6-96231b3b80d8 commit cd30d75375a03a290c6621da13cbab4f10545c56 Author: Tom Stellard Date: Fri Jan 29 16:45:55 2016 +0000 Merging r258922: ------------------------------------------------------------------------ r258922 | marek.olsak | 2016-01-27 06:19:45 -0500 (Wed, 27 Jan 2016) | 12 lines AMDGPU/SI: Stoney has only 16 LDS banks Summary: This is a candidate for stable, along with all patches that add the "stoney" processor. Reviewers: tstellarAMD Subscribers: arsenm Differential Revision: http://reviews.llvm.org/D16485 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259207 91177308-0d34-0410-b5e6-96231b3b80d8 commit a8a522e4217a621114bedcb1cedee056c59a6273 Author: Tom Stellard Date: Fri Jan 29 16:45:52 2016 +0000 Merging r257666: ------------------------------------------------------------------------ r257666 | changpeng.fang | 2016-01-13 15:39:25 -0500 (Wed, 13 Jan 2016) | 2 lines AMDGPU/SI: Update ISA version for FIJI ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259206 91177308-0d34-0410-b5e6-96231b3b80d8 commit c3c52626df3d5b9bd06b160450da8335deb24dc8 Author: Daniel Sanders Date: Thu Jan 28 21:05:40 2016 +0000 Bring back the test-suite export in test-release without bringing back the build failures. Summary: r257791 disabled the test-suite export since the addition of CMakeLists.txt was causing build failures. This patch exports the test-suite again but does so outside the source tree so that it isn't included in the Phase[123] builds. Reviewers: hans Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D16679 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259093 91177308-0d34-0410-b5e6-96231b3b80d8 commit 72901a8afaae6c9f8ea63ba1c9c9d4699c7eec49 Author: Hans Wennborg Date: Thu Jan 28 18:23:25 2016 +0000 Merging r258971: ------------------------------------------------------------------------ r258971 | spatel | 2016-01-27 11:22:45 -0800 (Wed, 27 Jan 2016) | 26 lines [SimplifyCFG] limit recursion depth when speculating instructions (PR26308) This is a fix for: https://llvm.org/bugs/show_bug.cgi?id=26308 With the switch to using the TTI cost model in: http://reviews.llvm.org/rL228826 ...it became possible to hit a zero-cost cycle of instructions (gep -> phi -> gep...), so we need a cap for the recursion in DominatesMergePoint(). A recursion depth parameter was already added for a different reason in: http://reviews.llvm.org/rL255660 ...so we can just set a limit for it. I pulled "10" out of the air and made it an independent parameter that we can play with. It might be higher than it needs to be given the currently low default value of PHINodeFoldingThreshold (2). That's the starting cost value that we enter the recursion with, and most instructions have cost set to TCC_Basic (1), so I don't think we're going to speculate more than 2 instructions with the current parameters. As noted in the review and the TODO comment, we can do better than just limiting recursion depth. Differential Revision: http://reviews.llvm.org/D16637 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259066 91177308-0d34-0410-b5e6-96231b3b80d8 commit 131d76722983cb030c392bcb50bba940e98ea0c6 Author: Hans Wennborg Date: Thu Jan 28 18:16:55 2016 +0000 Merging r258471: ------------------------------------------------------------------------ r258471 | pirama | 2016-01-21 17:16:57 -0800 (Thu, 21 Jan 2016) | 14 lines Do not lower VSETCC if operand is an f16 vector Summary: SETCC with f16 vectors has OperationAction set to Expand but still gets lowered to FCM* intrinsics based on its result type. This patch skips lowering of VSETCC if the operand is an f16 vector. v4 and v8 tests included. Reviewers: ab, jmolloy Subscribers: srhines, llvm-commits Differential Revision: http://reviews.llvm.org/D15361 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@259064 91177308-0d34-0410-b5e6-96231b3b80d8 commit 82cf8c0ebce3d4cac59da2cc36df0c0cd9730d72 Author: Hans Wennborg Date: Wed Jan 27 00:19:52 2016 +0000 Merging r258891: ------------------------------------------------------------------------ r258891 | hans | 2016-01-26 16:19:05 -0800 (Tue, 26 Jan 2016) | 25 lines test-release.sh: Ignore LC_CTYPE in sed invocation on Darwin Here, sed is used to prepare object files for comparison via cmp. On my Darwin 15.4.0 machine, LC_CTYPE is set to UTF-8 (by default, I believe). Under these circumstances, anything sed is made to read will be treated as UTF-8, prompting it to signal an error if it is not, like so: % sed s/a/b/ <(head -n1 /dev/random) >/dev/null; echo $? sed: RE error: illegal byte sequence 1 % To make sed work as expected, I need to set LC_CTYPE to C: % env LC_CTYPE=C sed s/a/b/ <(head -n1 /dev/random) >/dev/null; echo $? 0 % Without this change, sed will exit with an error for every single file that it compares between phase 2 and phase 3, thereby making it look as if the differences were far larger than they are. Patch by Elias Pipping! Differential Revision: http://reviews.llvm.org/D16548 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258892 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5eefadb302242035deaf04c5585bb4cd46125deb Author: Tom Stellard Date: Tue Jan 26 23:57:01 2016 +0000 Merging r258386: ------------------------------------------------------------------------ r258386 | thomas.stellard | 2016-01-20 23:28:34 -0500 (Wed, 20 Jan 2016) | 14 lines AMDGPU/SI: Pass whether to use the SI scheduler via Target Attribute Summary: Currently the SI scheduler can be selected via command line option, but it turned out it would be better if it was selectable via a Target Attribute. This patch adds "si-scheduler" attribute to the backend. Reviewers: tstellarAMD, echristo Subscribers: echristo, arsenm Differential Revision: http://reviews.llvm.org/D16192 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258885 91177308-0d34-0410-b5e6-96231b3b80d8 commit 44fb5881d8edf448d6231a5b8df583aecd6bcd42 Author: Sanjoy Das Date: Tue Jan 26 22:29:46 2016 +0000 Merging r258184: ------------------------------------------------------------------------ r258184 | sanjoy | 2016-01-19 12:53:51 -0800 (Tue, 19 Jan 2016) | 20 lines [SCEV] Fix PR26207 In some cases, the max backedge taken count can be more conservative than the exact backedge taken count (for instance, because ScalarEvolution::getRange is not control-flow sensitive whereas computeExitLimitFromICmp can be). In these cases, computeExitLimitFromCond (specifically the bit that deals with `and` and `or` instructions) can create an ExitLimit instance with a `SCEVCouldNotCompute` max backedge count expression, but a computable exact backedge count expression. This violates an implicit SCEV assumption: a computable exact BE count should imply a computable max BE count. This change - Makes the above implicit invariant explicit by adding an assert to ExitLimit's constructor - Changes `computeExitLimitFromCond` to be more robust around conservative max backedge counts ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258869 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4d1ef71f362e014aaaaefeb36abe83c24b578e40 Author: Hans Wennborg Date: Tue Jan 26 19:44:49 2016 +0000 Revert accidental changes from r258805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258844 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9a498947cdb25737faecfdabcb64848432c49d68 Author: Dimitry Andric Date: Tue Jan 26 19:43:59 2016 +0000 Merging r258436: ------------------------------------------------------------------------ r258436 | dim | 2016-01-21 22:57:49 +0100 (Thu, 21 Jan 2016) | 17 lines Let test-release.sh checkout subprojects directly into the target tree, instead of using symlinks Summary: In the past I have run into several problems with the way `test-release.sh` creates all the subproject directories as siblings, and then uses symlinks to stitch them all together. In some scenarios this leads to clang not being able to find header files, etc. This patch changes the script so it directly exports into the correct target locations for each subproject. Reviewers: hans Subscribers: emaste, llvm-commits Differential Revision: http://reviews.llvm.org/D16420 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258842 91177308-0d34-0410-b5e6-96231b3b80d8 commit 4b85564ba4a41465155b9128a68e5e14fea78365 Author: Hans Wennborg Date: Tue Jan 26 19:31:16 2016 +0000 Merging r258729: ------------------------------------------------------------------------ r258729 | matze | 2016-01-25 14:08:25 -0800 (Mon, 25 Jan 2016) | 13 lines X86ISelLowering: Fix cmov(cmov) special lowering bug There's a special case in EmitLoweredSelect() that produces an improved lowering for cmov(cmov) patterns. However this special lowering is currently broken if the inner cmov has multiple users so this patch stops using it in this case. If you wonder why this wasn't fixed by continuing to use the special lowering and inserting a 2nd PHI for the inner cmov: I believe this would incur additional copies/register pressure so the special lowering does not improve upon the normal one anymore in this case. This fixes http://llvm.org/PR26256 (= rdar://24329747) ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258840 91177308-0d34-0410-b5e6-96231b3b80d8 commit db6cb1a90cd0ab35e2dadc97962a5d67742c0bbc Author: James Molloy Date: Tue Jan 26 13:30:49 2016 +0000 Merging r258690: ------------------------------------------------------------------------ r258690 | jamesm | 2016-01-25 14:49:36 +0000 (Mon, 25 Jan 2016) | 7 lines [DemandedBits] Fix computation of demanded bits for ICmps The computation of ICmp demanded bits is independent of the individual operand being evaluated. We simply return a mask consisting of the minimum leading zeroes of both operands. We were incorrectly passing "I" to ComputeKnownBits - this should be "UserI->getOperand(0)". In cases where we were evaluating the 1th operand, we were taking the minimum leading zeroes of it and itself. This should fix PR26266. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258805 91177308-0d34-0410-b5e6-96231b3b80d8 commit 836d2ad83c5e955a23f6e3b78418cb250c95c88b Author: Hans Wennborg Date: Mon Jan 25 22:24:50 2016 +0000 Merging r258406: ------------------------------------------------------------------------ r258406 | vedantk | 2016-01-21 09:04:42 -0800 (Thu, 21 Jan 2016) | 16 lines [GCOV] Avoid emitting profile arcs for module and skeleton CUs Do not emit profile arc files and note files for module and skeleton CU's. Our users report seeing unexpected *.gcda and *.gcno files in their projects when using gcov-style profiling with modules or frameworks. The unwanted files come from these modules. This is not very helpful for end-users. Further, we've seen reports of instrumented programs crashing while writing these files out (due to I/O failures). rdar://problem/22838296 Reviewed-by: aprantl Differential Revision: http://reviews.llvm.org/D15997 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258731 91177308-0d34-0410-b5e6-96231b3b80d8 commit 16f83af7618a4dfb4ef1891b07adb23cc54e4c86 Author: Hans Wennborg Date: Fri Jan 22 18:37:31 2016 +0000 Merging r258416 and r258428: ------------------------------------------------------------------------ r258416 | spatel | 2016-01-21 10:01:57 -0800 (Thu, 21 Jan 2016) | 2 lines make helper functions static; NFCI ------------------------------------------------------------------------ ------------------------------------------------------------------------ r258428 | spatel | 2016-01-21 12:19:54 -0800 (Thu, 21 Jan 2016) | 15 lines [LibCallSimplifier] don't get fooled by a fake fmin() This is similar to the bug/fix: https://llvm.org/bugs/show_bug.cgi?id=26211 http://reviews.llvm.org/rL258325 The fmin() test case reveals another bug caused by sloppy code duplication. It will crash without this patch because fp128 is a valid floating-point type, but we would think that we had matched a function that used doubles. The new helper function can be used to replace similar checks that are used in several other places in this file. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258512 91177308-0d34-0410-b5e6-96231b3b80d8 commit c89d9654310e0f5b1171888c6573d09c9e66d0c4 Author: Hans Wennborg Date: Fri Jan 22 18:26:38 2016 +0000 Merging r257886: ------------------------------------------------------------------------ r257886 | jamesm | 2016-01-15 02:36:01 -0800 (Fri, 15 Jan 2016) | 3 lines [CodeGenPrepare] Try and appease sanitizers dupRetToEnableTailCallOpts(BB) can invalidate BB. It must run *after* we iterate across BB! ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258510 91177308-0d34-0410-b5e6-96231b3b80d8 commit 961a0e424cc7a63ee57cc8506c8a04cbf6012e1f Author: Hans Wennborg Date: Wed Jan 20 21:49:02 2016 +0000 Merging r258325: ------------------------------------------------------------------------ r258325 | spatel | 2016-01-20 09:41:14 -0800 (Wed, 20 Jan 2016) | 21 lines [LibCallSimplifier] don't get fooled by a fake sqrt() The test case will crash without this patch because the subsequent call to hasUnsafeAlgebra() assumes that the call instruction is an FPMathOperator (ie, returns an FP type). This part of the function signature check was omitted for the sqrt() case, but seems to be in place for all other transforms. Before: http://reviews.llvm.org/rL257400 ...we would have needlessly continued execution in optimizeSqrt(), but the bug was harmless because we'd eventually fail some other check and return without damage. This should fix: https://llvm.org/bugs/show_bug.cgi?id=26211 Differential Revision: http://reviews.llvm.org/D16198 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258353 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3acb8d3c6d4e470172fb244c809bc6fdd7948c29 Author: Hans Wennborg Date: Wed Jan 20 21:30:57 2016 +0000 Merging r257940: ------------------------------------------------------------------------ r257940 | djg | 2016-01-15 13:56:40 -0800 (Fri, 15 Jan 2016) | 10 lines [SelectionDAG] CSE nodes with differing SDNodeFlags In the optimizer (GVN etc.) when eliminating redundant nodes with different flags, the flags are ignored for the purposes of testing for congruence, and then intersected for the purposes of producing a result that supports the union of all the uses. This commit makes SelectionDAG's CSE do the same thing, allowing it to CSE nodes in more cases. This fixes PR26063. Differential Revision: http://reviews.llvm.org/D15957 ------------------------------------------------------------------------ Merging r257942: ------------------------------------------------------------------------ r257942 | djg | 2016-01-15 14:07:35 -0800 (Fri, 15 Jan 2016) | 2 lines Remove a now-empty file left behind by r257940. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258351 91177308-0d34-0410-b5e6-96231b3b80d8 commit 3260476414aa2e03566d205f742220a382f4ce07 Author: Hans Wennborg Date: Wed Jan 20 21:14:05 2016 +0000 Merging r258273: ------------------------------------------------------------------------ r258273 | josepht | 2016-01-19 18:15:15 -0800 (Tue, 19 Jan 2016) | 37 lines [Inliner/WinEH] Honor implicit nounwinds Summary: Funclet EH tables require that a given funclet have only one unwind destination for exceptional exits. The verifier will therefore reject e.g. two cleanuprets with different unwind dests for the same cleanup, or two invokes exiting the same funclet but to different unwind dests. Because catchswitch has no 'nounwind' variant, and because IR producers are not *required* to annotate calls which will not unwind as 'nounwind', it is legal to nest a call or an "unwind to caller" catchswitch within a funclet pad that has an unwind destination other than caller; it is undefined behavior for such a call or catchswitch to unwind. Normally when inlining an invoke, calls in the inlined sequence are rewritten to invokes that unwind to the callsite invoke's unwind destination, and "unwind to caller" catchswitches in the inlined sequence are rewritten to unwind to the callsite invoke's unwind destination. However, if such a call or "unwind to caller" catchswitch is located in a callee funclet that has another exceptional exit with an unwind destination within the callee, applying the normal transformation would give that callee funclet multiple unwind destinations for its exceptional exits. There would be no way for EH table generation to determine which is the "true" exit, and the verifier would reject the function accordingly. Add logic to the inliner to detect these cases and leave such calls and "unwind to caller" catchswitches as calls and "unwind to caller" catchswitches in the inlined sequence. This fixes PR26147. Reviewers: rnk, andrew.w.kaylor, majnemer Subscribers: alexcrichton, llvm-commits Differential Revision: http://reviews.llvm.org/D16319 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258349 91177308-0d34-0410-b5e6-96231b3b80d8 commit 38e40410b1fa6441db511e760bc6ae263a8bbaee Author: Renato Golin Date: Wed Jan 20 18:01:05 2016 +0000 Merging r258308: [AArch64] Fix two bugs in the .inst directive git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258326 91177308-0d34-0410-b5e6-96231b3b80d8 commit 870ff87d1cd25f9a2dd01d7c75489a63eca377c2 Author: Quentin Colombet Date: Wed Jan 20 01:14:03 2016 +0000 Merging r258221: ------------------------------------------------------------------------ r258221 | qcolombet | 2016-01-19 15:29:03 -0800 (Tue, 19 Jan 2016) | 8 lines [X86] Do not run shrink-wrapping on function with split-stack attribute or HiPE calling convention. The implementation of the related callbacks in the x86 backend for such functions are not ready to deal with a prologue block that is not the entry block of the function. This fixes PR26107, but the longer term solution would be to fix those callbacks. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258269 91177308-0d34-0410-b5e6-96231b3b80d8 commit 90fef5a5b6514f60396e81d7fa20581d05ca659b Author: Quentin Colombet Date: Wed Jan 20 01:09:12 2016 +0000 Merging r258207: ------------------------------------------------------------------------ r258207 | qcolombet | 2016-01-19 14:31:12 -0800 (Tue, 19 Jan 2016) | 1 line [MachineFunction] Constify getter. NFC. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258268 91177308-0d34-0410-b5e6-96231b3b80d8 commit 770ec8cf9ae215e26cb6d946b9d533151fe0558d Author: Hans Wennborg Date: Wed Jan 20 00:48:30 2016 +0000 Merging r257977: ------------------------------------------------------------------------ r257977 | kfischer | 2016-01-15 17:11:33 -0800 (Fri, 15 Jan 2016) | 1 line [DwarfDebug] Move MergeValues to .cpp, NFC ------------------------------------------------------------------------ Merging r257979: ------------------------------------------------------------------------ r257979 | kfischer | 2016-01-15 17:15:32 -0800 (Fri, 15 Jan 2016) | 11 lines [DwarfDebug] Don't merge DebugLocEntries if their pieces overlap Summary: Later in DWARF emission we check that DebugLocEntries have non-overlapping pieces, so we should create any such entries by merging here. Fixes PR26163. Reviewers: aprantl Differential Revision: http://reviews.llvm.org/D16249 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258267 91177308-0d34-0410-b5e6-96231b3b80d8 commit d26a2e75e58f56a289b911c0bf582be4f8f655f1 Author: NAKAMURA Takumi Date: Wed Jan 20 00:32:09 2016 +0000 [r257857] lli: use llvm::utostr() instead of std::to_string(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258266 91177308-0d34-0410-b5e6-96231b3b80d8 commit 316ab7147bf233fd6a84977173f99b3fc9a26e0e Author: NAKAMURA Takumi Date: Wed Jan 20 00:28:22 2016 +0000 [r257732] Mark remote-JIT tests as XFAIL, as well as win32, for targeting mingw32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258265 91177308-0d34-0410-b5e6-96231b3b80d8 commit f5575ecd57c4ab8cdae1a80fecc01029d14fe4e6 Author: Hans Wennborg Date: Tue Jan 19 20:49:25 2016 +0000 Merging r257875: ------------------------------------------------------------------------ r257875 | jamesm | 2016-01-15 01:20:19 -0800 (Fri, 15 Jan 2016) | 11 lines [InstCombine] Rewrite bswap/bitreverse handling completely. There are several requirements that ended up with this design; 1. Matching bitreversals is too heavyweight for InstCombine and doesn't really need to be done so early. 2. Bitreversals and byteswaps are very related in their matching logic. 3. We want to implement support for matching more advanced bswap/bitreverse patterns like partial bswaps/bitreverses. 4. Bswaps are best matched early in InstCombine. The result of these is that a new utility function is created in Transforms/Utils/Local.h that can be configured to search for bswaps, bitreverses or both. InstCombine uses it to find only bswaps, CGP uses it to find only bitreversals. We can then extend the matching logic in one place only. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258180 91177308-0d34-0410-b5e6-96231b3b80d8 commit e12bf2aba135af15b33cca8a8c0fb80189a16b80 Author: Hans Wennborg Date: Tue Jan 19 19:28:41 2016 +0000 Merging r258168: ------------------------------------------------------------------------ r258168 | hans | 2016-01-19 11:21:58 -0800 (Tue, 19 Jan 2016) | 3 lines test-release.sh: Use CMake also for Darwin This didn't work for 3.7, but hopefully it should work now. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258170 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1618eb04cdfdd3febf77bc67cdac5307e5528b96 Author: Hans Wennborg Date: Tue Jan 19 18:53:02 2016 +0000 Merging r257925, r257929, r257930, and r257997: ------------------------------------------------------------------------ r257925 | mren | 2016-01-15 11:35:42 -0800 (Fri, 15 Jan 2016) | 10 lines CXX_FAST_TLS calling convention: fix issue on X86-64. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. I will commit fixes to other platforms as well. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257929 | mren | 2016-01-15 12:13:28 -0800 (Fri, 15 Jan 2016) | 10 lines CXX_FAST_TLS calling convention: fix issue on AArch64. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. I will commit fixes to other platforms as well. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257930 | mren | 2016-01-15 12:24:11 -0800 (Fri, 15 Jan 2016) | 8 lines CXX_FAST_TLS calling convention: fix issue on ARM. When we have a single basic block, the explicit copy-back instructions should be inserted right before the terminator. Before this fix, they were wrongly placed at the beginning of the basic block. PR26136 ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257997 | mren | 2016-01-16 08:39:46 -0800 (Sat, 16 Jan 2016) | 12 lines CXX_FAST_TLS calling convention: fix issue on x86-64. %RBP can't be handled explicitly. We generate the following code: pushq %rbp movq %rsp, %rbp ... movq %rbx, (%rbp) ## 8-byte Spill where %rbp will be overwritten by the spilled value. The fix is to let PEI handle %RBP. PR26136 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258162 91177308-0d34-0410-b5e6-96231b3b80d8 commit aa96fb86c3304e81c2f53700223d0e795c302276 Author: Hans Wennborg Date: Tue Jan 19 18:26:37 2016 +0000 Merging r257902 (and r257775) ------------------------------------------------------------------------ r257775 | jyknight | 2016-01-14 08:33:21 -0800 (Thu, 14 Jan 2016) | 3 lines Revert "Stop increasing alignment of externally-visible globals on ELF platforms." This reverts commit r257719, due to PR26144. ------------------------------------------------------------------------ ------------------------------------------------------------------------ r257902 | jyknight | 2016-01-15 08:33:06 -0800 (Fri, 15 Jan 2016) | 17 lines Stop increasing alignment of externally-visible globals on ELF platforms. With ELF, the alignment of a global variable in a shared library will get copied into an executables linked against it, if the executable even accesss the variable. So, it's not possible to implicitly increase alignment based on access patterns, or you'll break existing binaries. This happened to affect libc++'s std::cout symbol, for example. See thread: http://thread.gmane.org/gmane.comp.compilers.clang.devel/45311 (This is a re-commit of r257719, without the bug reported in PR26144. I've tweaked the code to not assert-fail in enforceKnownAlignment when computeKnownBits doesn't recurse far enough to find the underlying Alloca/GlobalObject value.) Differential Revision: http://reviews.llvm.org/D16145 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258155 91177308-0d34-0410-b5e6-96231b3b80d8 commit ed504bedd7420790d55e441e35e5449eaa40029e Author: Hans Wennborg Date: Tue Jan 19 17:28:24 2016 +0000 Merging r257905: ------------------------------------------------------------------------ r257905 | hans | 2016-01-15 09:04:45 -0800 (Fri, 15 Jan 2016) | 3 lines test-release.sh: Fix clang-tools-extra symlink for CMake build The CMake and Autoconf builds want the symlink set up differently. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@258146 91177308-0d34-0410-b5e6-96231b3b80d8 commit c1316b6adfbb17b961a3bee357e728ca0d4d1c96 Author: Hans Wennborg Date: Thu Jan 14 23:24:17 2016 +0000 Merging r257791: ------------------------------------------------------------------------ r257791 | hans | 2016-01-14 11:21:14 -0800 (Thu, 14 Jan 2016) | 4 lines Exclude test-suite from CMake builds in test-release.sh It's broken. In 3.7 there wasn't a CMake build for test-suite at all, so we're not losing something we had before. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257836 91177308-0d34-0410-b5e6-96231b3b80d8 commit 25d64abdb39a834541edbafdc686f371dad58a76 Author: Hans Wennborg Date: Thu Jan 14 17:52:28 2016 +0000 Merging r257730: ------------------------------------------------------------------------ r257730 | majnemer | 2016-01-13 17:20:03 -0800 (Wed, 13 Jan 2016) | 11 lines [X86] Don't alter HasOpaqueSPAdjustment after we've relied on it We rely on HasOpaqueSPAdjustment not changing after we've calculated things based on it. Things like whether or not we can use 'rep;movs' to copy bytes around, that sort of thing. If it changes, invariants in the backend will quietly break. This situation arose when we had a call to memcpy *and* a COPY of the FLAGS register where we would attempt to reference local variables using %esi, a register that was clobbered by the 'rep;movs'. This fixes PR26124. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257779 91177308-0d34-0410-b5e6-96231b3b80d8 commit 7b9eef037dbacab102881f19826fb04cfe69c7e7 Author: Hans Wennborg Date: Thu Jan 14 00:23:32 2016 +0000 ReleaseNotes.rst: a few entries from Rafael git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257725 91177308-0d34-0410-b5e6-96231b3b80d8 commit 53d8ef00d82460b9c8ce08617d91bbce8313d4a3 Author: Hans Wennborg Date: Wed Jan 13 21:18:59 2016 +0000 Merging r257648: ------------------------------------------------------------------------ r257648 | hans | 2016-01-13 10:59:45 -0800 (Wed, 13 Jan 2016) | 1 line Fix struct/class mismatch for MachineSchedContext ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257668 91177308-0d34-0410-b5e6-96231b3b80d8 commit 38fcb6f10f0ae867bfe796f26bf1a336bf0dddf0 Author: Dimitry Andric Date: Wed Jan 13 19:37:51 2016 +0000 Merging r257645: ------------------------------------------------------------------------ r257645 | dim | 2016-01-13 19:29:46 +0100 (Wed, 13 Jan 2016) | 22 lines Avoid undefined behavior in LinkAllPasses.h The LinkAllPasses.h file is included in several main programs, to force a large number of passes to be linked in. However, the ForcePassLinking constructor uses undefined behavior, since it calls member functions on `nullptr`, e.g.: ((llvm::Function*)nullptr)->viewCFGOnly(); llvm::RGPassManager RGM; ((llvm::RegionPass*)nullptr)->runOnRegion((llvm::Region*)nullptr, RGM); When the optimization level is -O2 or higher, the code below the first nullptr dereference is optimized away, and replaced by `ud2` (on x86). Therefore, the calls after that first dereference are never emitted. In my case, I noticed there was no call to `llvm::sys::RunningOnValgrind()`! Replace instances of dereferencing `nullptr` with either objects on the stack, or regular function calls. Differential Revision: http://reviews.llvm.org/D15996 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257660 91177308-0d34-0410-b5e6-96231b3b80d8 commit 9faaefea9cbef6453486ed825c1ca4305bf68324 Author: Hans Wennborg Date: Wed Jan 13 19:03:44 2016 +0000 Drop 'svn' suffix from version. git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257651 91177308-0d34-0410-b5e6-96231b3b80d8 commit 5ab5731312b6a8736fbe7fad1cb10f384b3a295e Author: Hans Wennborg Date: Wed Jan 13 17:34:56 2016 +0000 Creating release_38 branch off revision 257626 git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_38@257630 91177308-0d34-0410-b5e6-96231b3b80d8 commit 1526147c0ad76667de046ef168d5cc5eee381bb7 Author: TB Schardl Date: Tue Jan 12 12:40:37 2016 +0000 Bug fix to include setSuccessor method on reattach instruction. commit 2b1b34e00cbc085a4a9a290c65fffaabae9517dc Author: TB Schardl Date: Thu Dec 31 04:05:48 2015 +0000 Add -instrument-cilk support to detach2cilk, cilkabi commit 4328b4468c0e42c1f89f5212e1386c38394edf20 Merge: 062301f913b 8a32dc47d61 Author: TB Schardl Date: Wed Dec 30 01:45:54 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 062301f913b5ac657607f0c758392ac8a18d5c13 Merge: 9893cc49b22 48a798cb4b4 Author: TB Schardl Date: Tue Dec 29 22:23:46 2015 +0000 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 9893cc49b223291071ea6633cd3f5c376acce9dd Author: TB Schardl Date: Tue Dec 29 22:22:01 2015 +0000 SimplifyCFG now removes unncessary Sync instructions. commit 48a798cb4b473470ad6ceaa6cc3e45dd569d0627 Merge: 54dbddeaec7 8d00ea68834 Author: Billy Moses Date: Wed Nov 11 10:50:51 2015 -0500 for counting commit 54dbddeaec7fa2bcdb3ad906c2cb99232342f00b Merge: 19481e914d1 88d51ce445e Author: Billy Moses Date: Wed Nov 11 10:18:55 2015 -0500 moded commit 8d00ea68834b61ce260b8111beb594cbdc8c78b9 Merge: 2ae39eb69c5 65cad952e45 Author: TB Schardl Date: Fri Nov 6 11:51:30 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 2ae39eb69c54cfb2206514873bca9cb1ac3738b0 Author: TB Schardl Date: Thu Nov 5 14:58:05 2015 +0000 [SimplifyCFG] Fixed bug where empty reattach blocks with multiple predecessors would crash this pass. commit 7bd0f59e1aa75abe8a238d1ec166d6148722ebdd Merge: 8ae8e06e3cd c135da21a3c Author: TB Schardl Date: Wed Nov 4 02:12:41 2015 +0000 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 8ae8e06e3cdf762ce50de096115ecfac5c998b63 Merge: a9530cd93a2 7e6636cb71f Author: TB Schardl Date: Wed Nov 4 01:26:22 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit c135da21a3cca833224099aeeac85aad0ec5144d Author: Billy Moses Date: Mon Nov 2 23:13:23 2015 -0500 all cleaned up & ready to go commit a9530cd93a293b6e21665883a74b42859061acd8 Merge: 329f5fad3f7 1965754e592 Author: TB Schardl Date: Fri Oct 16 14:24:35 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 329f5fad3f72dd84a3e4cf5818512a6b7e81c657 Merge: e0717ad48cc 600b09339de Author: TB Schardl Date: Fri Oct 16 00:37:12 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit e0717ad48cc7c447b4f1159116b06ff82c4efdd3 Merge: 20e95d87b5e 4b6405d130b Author: TB Schardl Date: Fri Oct 16 00:36:05 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 20e95d87b5e8234390f2b4cc6ef46a5ebea58e0c Merge: 44d4e427c7f bcd41c02dde Author: TB Schardl Date: Tue Oct 13 16:57:43 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit cac7ff23aac4127106c74d7cdaa5b6f11d3d5e00 Merge: ab253e4510c 387b1f61aad Author: Billy Moses Date: Tue Oct 13 12:34:49 2015 -0400 Merge branch 'master' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit ab253e4510c21e111e4c56fda345c19d3b232650 Author: Billy Moses Date: Tue Oct 13 12:34:31 2015 -0400 cache loop2cilk commit 44d4e427c7f008295af785fbad29857952be6d9a Merge: 387b1f61aad 938c3d3164e Author: TB Schardl Date: Tue Oct 13 12:52:44 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 387b1f61aad986ddc9032d82e2e48e9c5e1b064d Merge: 81e2fd12aea 3d58b720c31 Author: TB Schardl Date: Fri Oct 2 19:47:27 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 81e2fd12aea84c2ab59cd73cbcad2665a947ce0b Author: TB Schardl Date: Fri Oct 2 19:43:24 2015 +0000 Adding Detach2Cilksan pass to enable Cilksan race detection. commit 7a634e24c5bc7a520e8979646da17c09895f5425 Author: TB Schardl Date: Fri Oct 2 19:42:47 2015 +0000 Some debugging of Detach2Cilk commit cdf14afd5eeb21dedc32c3a62b1f76af95016974 Merge: 25f43658061 36caf0659ff Author: TB Schardl Date: Tue Sep 22 17:53:11 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 25f436580618875268ef313894e05802617bbdf0 Author: TB Schardl Date: Tue Sep 22 04:00:34 2015 +0000 Fixing loop rotation to prevent it from destroying sync instructions. commit 8ec1e7597748edd42654657f992aa4209bd04cf9 Merge: 4fc3d85490a dabf510ba1b Author: TB Schardl Date: Sun Sep 20 19:06:45 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 4fc3d85490a81d6adbd21b5f66646a9f397fe333 Author: TB Schardl Date: Sun Sep 20 19:06:24 2015 +0000 Fixed GVN to handle scalarPRE around detach/reattach and to abort load PRE in the event of an aliased access from a detach or sync. commit dc7cd94ca46ba477e113d2844de893b82b95b081 Author: TB Schardl Date: Sun Sep 20 19:05:06 2015 +0000 Updated AliasAnalysis to analyze detached blocks for aliasing information for detach and sync instructions. commit 421d2351ba4e14ff211a3c6cbe9258ccddf19afa Merge: 54b97afc6bc 29f50e97835 Author: TB Schardl Date: Sun Sep 13 12:11:13 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 54b97afc6bc145d1e28a8a3c94de524d809cddf1 Author: TB Schardl Date: Wed Sep 9 20:25:28 2015 +0000 Making syncs look like fences, in order to fix memory analysis issues. commit 4420c17e34959d2a33ba4c9fd9ae5ff6066f797a Merge: e6d3b51ad7d 3c76435341d Author: TB Schardl Date: Wed Sep 9 01:12:29 2015 +0000 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit e6d3b51ad7de5aaece38701cbe0b9401f481b13c Merge: eaa3d3ce261 9e01a11e67c Author: TB Schardl Date: Tue Sep 8 21:47:52 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 3c76435341d943764ecafb324971a254c95b39df Author: Billy Moses Date: Tue Sep 8 16:40:32 2015 -0400 Working parallel opt pass commit eaa3d3ce261db5812277ba6cd250ce501f77849c Merge: d9eeab4f9c8 3d88beedefc Author: TB Schardl Date: Tue Sep 8 17:14:47 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit d9eeab4f9c8bd662a771d87e73f61165c12cd14b Merge: f09f6e7a51b 7e316839810 Author: TB Schardl Date: Tue Sep 8 15:49:47 2015 +0000 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 5f20c20dcf53f27e56915263e99d810bbf403697 Author: Billy Moses Date: Mon Sep 7 22:05:31 2015 -0400 Semi-working cilk pass commit f09f6e7a51b1b270a48d2f66312ff282f1ad6959 Author: TB Schardl Date: Fri Sep 4 12:13:17 2015 -0400 Fixed build problems with last merge. commit 8b666563572297a50f9a17efbd060e8f780f0f04 Merge: abe3f70de04 2354b37ae03 Author: TB Schardl Date: Fri Sep 4 11:40:09 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 41ddcdf5d8e40544ece73167368487f0195b1b5f Merge: fea705e7114 abe3f70de04 Author: Billy Moses Date: Tue Sep 1 23:17:47 2015 -0400 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit fea705e71145c13d37dcedf6b260ed38d75b7ad1 Merge: dd9331be0b0 19481e914d1 Author: Billy Moses Date: Tue Sep 1 17:14:52 2015 -0400 Merge branch 'tb-scratch' into billy-scratch commit dd9331be0b0f2c6172666774f3f9d3fb17121154 Author: Billy Moses Date: Tue Sep 1 17:13:27 2015 -0400 Commit detach pass before merge commit abe3f70de0450a6ff4d169e2f8a7c884f38b5b43 Merge: 61fde862bba ac515c40878 Author: TB Schardl Date: Tue Sep 1 16:59:07 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 61fde862bba820f143ea0545dc1804fe53523efc Merge: 19481e914d1 9907691f42a Author: TB Schardl Date: Sun Aug 30 09:37:44 2015 -0400 Merge branch 'billy-scratch' of github.com:taekwonbilly/Parallel-IR into tb-scratch commit 19481e914d1b1c4ee1db106d8f01b986ba4f90ae Merge: fadec4720ee 2b5188b98a3 Author: TB Schardl Date: Sun Aug 30 08:51:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit fadec4720ee7b66c5f4a362e2f0e0e8b2c127ce6 Merge: 4fcaa4205d2 43928f79096 Author: TB Schardl Date: Sat Aug 29 12:03:38 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 4fcaa4205d29c0c7c96d5e422f16db53db786e82 Author: TB Schardl Date: Sat Aug 29 12:03:11 2015 -0400 Fixed bug where JumpThreading would attempt to split reattach edges. commit 6342321c427d73af4fafe79c88d60d5945d192e2 Author: TB Schardl Date: Sat Aug 29 12:02:17 2015 -0400 Fixed bug where SCCP did not recognize detach/reattach/sync. commit cd5c25c6646f9fa4472be7f4148e938b3db180fc Author: TB Schardl Date: Fri Aug 28 18:12:45 2015 -0400 Removing dead code from SROA. commit 613e58985cd9077134dc120d465bbf4ad7c624b1 Merge: 16929701716 21f084aa722 Author: TB Schardl Date: Fri Aug 28 18:07:45 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 9907691f42ac9a51278d9a4fb20496f1a08531cc Author: Billy Moses Date: Thu Aug 27 14:58:29 2015 -0400 Add temporary hack to enable compiling serial version to executable commit 42a2eef9caf19027aac8829f2e90cc3194e87fe4 Merge: 703f88a7461 2d184c72270 Author: Billy Moses Date: Wed Aug 26 16:57:08 2015 -0400 Merge branch 'tb-scratch' into billy-scratch commit 16929701716110895498f4d5528c740355545472 Merge: 2d184c72270 4abce6e698a Author: TB Schardl Date: Tue Aug 25 14:00:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into tb-scratch commit 2d184c7227076f1843ef28ab46c9a6736cb5faea Author: TB Schardl Date: Sun Aug 23 11:49:32 2015 -0400 Relaxed need for commutativity in serial TRE. commit aecdc8f291e3faa379ec24337be337095a685ea0 Author: TB Schardl Date: Sun Aug 23 11:49:03 2015 -0400 Fixed bug in BitcodeWriter with reattach causing opt to crash when emitting bitcode. commit f7f1cce493e65e181225f5d439cfdc1798717e2e Merge: 45d7087de1c 8724a428dfd Author: TB Schardl Date: Sat Aug 22 09:50:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 45d7087de1c8bc1360e107a30c937d9b24189f49 Author: TB Schardl Date: Sat Aug 22 09:43:10 2015 -0400 Draft enhancement to accumulator TRE to use identity values. commit 85eda242bd0b50027d4859450206d336e3e585f5 Merge: f135205b97a 0d125ca11e9 Author: TB Schardl Date: Tue Aug 18 11:25:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit f135205b97a8352113ff27c8fa8158aade75254a Author: TB Schardl Date: Tue Aug 18 10:40:50 2015 -0400 Adding 'getIdentity()' method to Instruction to enhance serial TRE. commit fe40d5f2a3d392c9836968fb0c8ba3df1ebc908c Merge: d3cdbb9137e 378e97e50c4 Author: TB Schardl Date: Mon Aug 17 08:52:52 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit d3cdbb9137e07f806ce69ae7f327749694b7b8b2 Merge: 653d0bbdd47 126b405bec6 Author: TB Schardl Date: Sat Aug 15 11:33:43 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 653d0bbdd47f7c8520941a9ea1ca5ce2d431bda5 Merge: 99611974297 26e17390798 Author: TB Schardl Date: Fri Aug 14 09:25:49 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 99611974297552647706e46eb290be13f1ee6a82 Merge: 4bf70c75ac9 22af77d94f3 Author: TB Schardl Date: Thu Aug 13 12:36:36 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 4bf70c75ac9f1d5eee6b5c2cbfbdb9b5d0de8f3b Merge: 4dec88872b7 a5ccfee2752 Author: TB Schardl Date: Tue Aug 11 13:31:22 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 4dec88872b7e91e3f680a304b98ee3f197f5e9db Merge: e2aac9890d9 abdf937a221 Author: TB Schardl Date: Mon Aug 10 12:53:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit e2aac9890d934cff8b1f09d5c31fa6c804b80bb0 Merge: 8b8574d13a1 73b16a70f16 Author: TB Schardl Date: Sat Aug 8 09:12:17 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 8b8574d13a13cab91984d55cb78ebfae7caaf941 Merge: 2ee8648835e 1962b1b6b7e Author: TB Schardl Date: Fri Aug 7 09:04:50 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 2ee8648835e211ba1a93501acb5ece9f3d5d406b Author: TB Schardl Date: Thu Aug 6 08:53:21 2015 -0400 Bug fix on marking Sync instructions as potentially reading or writing memory commit 156cf024ecde0d1a725e32239c3057c71297fcfa Merge: 7d823a9c882 7809bb2e968 Author: TB Schardl Date: Thu Aug 6 08:08:36 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7d823a9c882be773768c6c38d92cad7da9880b2f Author: TB Schardl Date: Thu Aug 6 08:08:02 2015 -0400 Adding SyncInst to set of instructions that may read/write memory. commit 383d9f685189d8294df1f988e7b2c328b2227873 Author: TB Schardl Date: Tue Aug 4 15:15:40 2015 -0400 Fixed typos from previous merge. commit 90a25b1e5633c00cec6a5dd77b998aeb9bfbfc19 Merge: 7907e1dbfd7 a639e155a28 Author: TB Schardl Date: Tue Aug 4 14:30:25 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7907e1dbfd714cc121978597e0e552b1aa6eb195 Merge: 9819737b739 c71235ab7d7 Author: TB Schardl Date: Fri Jul 31 08:49:30 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 9819737b7396013f3d6dee738b070f11b1a52e8c Merge: 2c1c7bc0320 dc9125e8d13 Author: TB Schardl Date: Wed Jul 29 08:48:13 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 2c1c7bc0320cf3d5a74b2ad8cf91f24fa641da97 Author: TB Schardl Date: Wed Jul 29 08:47:45 2015 -0400 Adding function to SimplifyCFG to elide detach statements whose continuation immediately syncs. commit c950f20aa21eca8300eed7b10f98e4b61109311d Author: TB Schardl Date: Tue Jul 28 10:48:23 2015 -0400 Added optimization to remove trivial reattach blocks. commit 86df0ba3770a03a8271a5bba7f1a3708b3f0d153 Merge: 3fbb3bcf4cb bf26b3fcaec Author: TB Schardl Date: Mon Jul 27 08:22:30 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3fbb3bcf4cbbe96c286774917025664dd8e2de80 Merge: 7bb5864b2ad 52f969b0298 Author: TB Schardl Date: Thu Jul 23 08:57:48 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 7bb5864b2ad318eb969b7f8d78e6d5171a8b9cbc Merge: 9a2143e2643 717d8ad6cf4 Author: TB Schardl Date: Wed Jul 22 08:02:57 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 9a2143e26433557e7f1eac221099bd037e487e80 Merge: c9d4623ac37 c721349466d Author: TB Schardl Date: Tue Jul 21 08:42:46 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c9d4623ac37b0ba06e727dc71df3ec559a267762 Author: TB Schardl Date: Mon Jul 20 15:56:29 2015 -0400 Don't perform PRE across a detach or reattach, as it requires splitting a critical edge. commit e1df337ce92636114885f3268afaa571e279bcb2 Author: TB Schardl Date: Mon Jul 20 15:55:53 2015 -0400 Detach-reattach pairs create unsplittable critical edges. Add some asserts to check that we don't try to split those edges. commit 48ec13d545fde4c80f86132b330dec9c672c29b3 Author: TB Schardl Date: Mon Jul 20 13:32:44 2015 -0400 Minor edit to instruction combining to avoid pessimization of moving code after a sync. commit 46d9cfe4c634c7229c16623ca17f0b27d3c7ad28 Merge: c99bacd4cec 96d9043a78b Author: TB Schardl Date: Mon Jul 20 10:53:45 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c99bacd4cecc8f6a9b0f159d957c81ca90a53c06 Author: TB Schardl Date: Mon Jul 20 10:50:26 2015 -0400 Updating existing optimization passes to generate correct code around detach/reattach/sync instructions. Tested on fib and simple race example codes. Some passes, such as redundant instruction combining, are still pessimizations for these parallel codes. commit bf96714f54abff14ce58abec408cafb5367ab0fe Merge: c8594201bba 591adee23bf Author: TB Schardl Date: Fri Jul 17 09:14:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c8594201bba51bac12bf581ec5f11aff5e767f9b Merge: c10991b43d5 72400f8d508 Author: TB Schardl Date: Thu Jul 16 08:59:20 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit c10991b43d5dedafd23d7579635da4e111fd598c Merge: 1d47de608d6 4aa2f4514cc Author: TB Schardl Date: Wed Jul 15 08:16:18 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 1d47de608d6e59908f715569137f5e2dac1f339a Merge: 3a70241cdea 815d6131a4d Author: TB Schardl Date: Tue Jul 14 09:19:14 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3a70241cdea09232c8e26cfe42e56fac598ed8ba Author: TB Schardl Date: Sun Jul 12 08:24:32 2015 -0400 Updated PromoteMemoryToRegister to properly handle reattach, specifically, to avoid promoting alloca's if doing so would require a Phi node to inherit register state through a reattach. commit 51d54d96cc3cdaec661ea2268e8dd6294b22375a Author: TB Schardl Date: Sun Jul 12 08:23:11 2015 -0400 Adjusting reattach to look more like a branch. commit d39d1f75be719678706e403c64d1a53f9387ef98 Author: TB Schardl Date: Sun Jul 12 08:22:20 2015 -0400 Updated comments in IRBuilder commit 08f1f890d00a14f4ffccdf7da44b8c7b0e5daa12 Merge: 3fa3c489669 1e3fa768c01 Author: TB Schardl Date: Sat Jul 11 07:59:42 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3fa3c489669220cef599f61adb52c0b3eba4bc0c Merge: 48100a712f7 e57b60a7f96 Author: TB Schardl Date: Fri Jul 10 08:48:24 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 48100a712f7dddd6578ec0f93fd55ae5ddc033ce Merge: 72a88786c60 86b4ed2fc40 Author: TB Schardl Date: Thu Jul 9 09:01:23 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 703f88a7461d9741c5d0203c02e702f48cda74e7 Merge: 5c355339f57 8e3d42ecb81 Author: Billy Moses Date: Wed Jul 8 21:51:37 2015 -0700 Merge branch 'tb-scratch' of github.com:taekwonbilly/Parallel-IR into billy-scratch commit 5c355339f57181fbf8ce8e665ce4a5e1b18a6a35 Author: Billy Moses Date: Wed Jul 8 21:50:21 2015 -0700 fix merge error commit 72a88786c604e0c99dace11e7ab02b9bea53c7c4 Merge: ab1078ca539 080d7a819f4 Author: TB Schardl Date: Wed Jul 8 07:54:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit ab1078ca5394d4a132b9bfef2b45fe9936355c62 Author: TB Schardl Date: Tue Jul 7 23:59:33 2015 -0400 Rework reattach to take a basic block as an argument. Reattach is therefore like a break, while not being a break. commit 189cbf6873ffb4880a10098341abdc18447d38d3 Merge: 8e3d42ecb81 7b7c81cd353 Author: TB Schardl Date: Tue Jul 7 13:45:38 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 8e3d42ecb81ed3d9b8a9bc45e17ba151aaba45fc Author: TB Schardl Date: Tue Jul 7 08:58:35 2015 -0400 Initial hack to disallow SimplifyCFG from removing sync instructions commit 738e14f4a4dcb70e7e9e6ff1e0727b05ce14c008 Author: TB Schardl Date: Tue Jul 7 08:57:47 2015 -0400 Fix comments on SyncInst commit bf1508cc4427479a10092210237db6678c1ef6d5 Merge: 19e947bd14f 2822246ecee Author: TB Schardl Date: Tue Jul 7 08:18:24 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 19e947bd14f9b9e718ab634481a0a0d96962b216 Merge: de195a8462b a25ee390b55 Author: TB Schardl Date: Mon Jul 6 08:24:00 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit bb180502346ef66f459596d49bc26c15dc822f88 Merge: c6662084d9e de195a8462b Author: Billy Moses Date: Sun Jul 5 21:36:33 2015 -0700 Merge branch 'tb-scratch' into billy-scratch Conflicts: include/llvm/Bitcode/LLVMBitCodes.h lib/AsmParser/LLParser.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h lib/IR/Instructions.cpp commit de195a8462b52201728b19904a7ff895e3c2b8a0 Author: TB Schardl Date: Thu Jul 2 15:04:46 2015 -0400 Temporary sync instruction, in order to develop dominance analysis for CFG's with parallel control dependencies. commit 738db4461c0b4305c31f9feab72003012c2dcea8 Merge: 02ff4acf5a2 e4e6f29c93d Author: TB Schardl Date: Thu Jul 2 08:52:48 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 02ff4acf5a260ed830edf5f0764c49f3ce5bdfda Merge: 999aed1e3d0 7e6843cbd68 Author: TB Schardl Date: Wed Jul 1 09:43:34 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 999aed1e3d0c0df3d4d3d8f5b4ebbe7181834cba Author: TB Schardl Date: Wed Jul 1 09:42:21 2015 -0400 Teaching SCCP about 'detach' and 'reattach', such that optimization passes can run on codes with these IR instructions. commit d2f3f1e9b8c80feb8621e3897998a24c68365bed Merge: ea299f63c15 37cb5f1c2db Author: TB Schardl Date: Wed Jul 1 08:33:51 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit ea299f63c158dd1b90dcef36729f361c69f54505 Author: TB Schardl Date: Tue Jun 30 08:27:46 2015 -0400 Renamed 'spawn' to 'detach' commit f88a6553ebb86f8d5304a7b8df238b2274d936cd Merge: 3b6df76c9a1 a5106ca54d0 Author: TB Schardl Date: Tue Jun 30 08:03:44 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit 3b6df76c9a154c3ae22fe89569dfdac23637d12b Merge: e62bd55cd9d e8f07a7eb39 Author: TB Schardl Date: Mon Jun 29 09:50:59 2015 -0400 Merge branch 'master' of http://llvm.org/git/llvm into pir commit e62bd55cd9d749090f9137363ae55ada11a2eb4e Merge: 4dc79856c77 43e99f618db Author: William Moses Date: Sun Jun 28 16:13:19 2015 -0700 Merge pull request #1 from taekwonbilly/billy-scratch Add reattach instruction commit c6662084d9ecac843815ba39681d6ad2b3c3faaf Author: Billy Moses Date: Sun Jun 28 16:09:53 2015 -0700 allow to compile commit 43e99f618db80683c40b98110a9320fb88f2b75f Author: Billy Moses Date: Sat Jun 27 13:20:11 2015 -0700 add token commit 1a4a51b9510224c583acc08555807713a26277e2 Author: Billy Moses Date: Thu Jun 25 22:35:46 2015 -0700 Reattach commit 5861430d7fe8c36d01e42d5a79765232d3733a55 Author: Billy Moses Date: Thu Jun 25 22:13:27 2015 -0700 Reattach commit 4dc79856c77887cd506b15fee5793608071c7b0d Author: TB Schardl Date: Thu Jun 25 14:08:59 2015 -0400 Cleanup: remove unnecessary space commit 592fd5576cc26e3a0ba7efe4918b0c8f94c54b0f Merge: 08297c02e75 eebe475623c Author: TB Schardl Date: Thu Jun 25 13:49:16 2015 -0400 Merge remote-tracking branch 'origin/tb-scratch' into pir commit 08297c02e75ec7416751d443a99239d464c90061 Author: TB Schardl Date: Thu Jun 25 13:46:26 2015 -0400 Porting spawn instruction to current LLVM master. Added CreateSpawn to IRBuilder. commit eebe475623c877375a6718b362a76e2bd8843e11 Author: TB Schardl Date: Wed May 27 10:00:32 2015 -0400 cleaning up directory to support compilation on my system within a separate build directory commit 41059692e83eacd80f90f7df15510f97ae7c679d Author: Billy Moses Date: Tue May 26 18:27:48 2015 -0400 fix commit 21846df31a8b5b45b82781e8f8a6eb8c9c8dcb0f Author: Billy Moses Date: Tue May 26 17:55:07 2015 -0400 rm scruff commit d3d85e53fb33660f44a60f9e1c04c133596a7344 Author: Billy Moses Date: Tue May 26 13:13:00 2015 -0400 cleanup commit 8cc15c93dcee39782e92168f85e67fb7db46d069 Merge: 218888afe22 f3fb567248e Author: Billy Moses Date: Tue May 26 13:11:33 2015 -0400 Merge branch 'master' of github.com:taekwonbilly/Parallel-IR fix issue commit 218888afe22e6c297f19a5641809492429be18a7 Author: Billy Moses Date: Tue May 26 13:09:58 2015 -0400 fixed commit f3fb567248ece821dd2cd77008d1be0c385e78b0 Author: Billy Moses Date: Wed May 20 16:45:43 2015 -0400 ud commit 8721e720eeb689bf1e9f3f401a4aa851725cc126 Author: Billy Moses Date: Wed Apr 15 09:57:41 2015 -0400 reset commit b1dd73bcb3e3adc89c78acf620b81a7271f261b3 Author: Billy Moses Date: Mon Apr 13 10:13:29 2015 -0400 Last commit before change syntax commit 5cdcb6294493acf8bf10274c3a4a6f1f70c6de36 Author: Billy Moses Date: Mon Apr 6 12:05:40 2015 -0400 Updated llvm / added Future Type commit 2ce961b4e05eab9cb04b59e73ee1209b74e39524 Author: Billy Moses Date: Wed Apr 1 11:04:13 2015 -0400 update llvm commit 5a8e342deb6ff3f9535890096b76731028740219 Author: Billy Moses Date: Tue Mar 17 22:55:24 2015 -0400 Added llvm src commit 140e15b2bddcc72a1a07b1dce8b84ae00f371e55 Author: Billy Moses Date: Tue Mar 17 22:11:09 2015 -0400 first commit Updated clang submodule Code cleanup to reduce diff against mainline LLVM. Fixes to address several failing LLVM regression tests. Based on the SyncElimination tests, specifically "for2.ll," it appears that SyncElimination removes sync instructions that are not safe to remove. One relevant test has been updated to note this problem and marked "XFAIL." [CodeGen] Reverting an earlier change to SelectionDagISel for Cilk codes. Previously, to fix an “rbp/rsp issue” with Cilk codes, SelectionDagISel was changed to set a flag in functions that expose “returns twice”, in order to make those functions appear to contain variable sized objects. Setting this flag causes LLVM regression test “CodeGen/X86/setjmp-spills” to fail. Setting related flags, such as “HasOpaqueSPAdjustment” through their existing public interface also causes the same regression test to fail. In addition, I don’t see any rbp/rsp issues with Cilk codes when SelectionDagISel is does not set any such flag. For these reasons, I'm removing this previous change to SelectionDagISel. [PassManager] Reworking Tapir modification to PassManagerBuilder to ensure that Sanitizer instrumentation passes run only once. This change should also help improve Tapir's compatibility with LTO. [TSan] Reverting change to TSan instrumentation, which was causing a test to fail. We will need to introduce a new instrumentation pass specifically for CilkSan. Updated clang submodule [CilkSanitizer] Added custom instrumentation pass for CilkSan. [CilkSanitizer] Added instrumentation of memory intrinsics and atomics. Added simple optimization to elide instrumentation of non-captured pointers in serial functions. Updated clang submodule. [CilkSanitizer] Improved analysis for avoiding instrumentation. [CilkSanitizer] Allow CilkSanitizer to handle a larger variety of memory access sizes and to properly ignore memory accesses of illegal sizes. [AliasAnalysis] Fixed compiler warning. [CSI] Store multiple filenames when multiple files are compiled together. Updated clang submodule Remove .travis.yml --- .github/ISSUE_TEMPLATE/bug_report.md | 53 + .github/workflows/clang-tests.yml | 5 +- .github/workflows/libclc-tests.yml | 5 +- .github/workflows/lld-tests.yml | 5 +- .github/workflows/lldb-tests.yml | 5 +- .github/workflows/llvm-bugs.yml | 2 +- .github/workflows/llvm-project-tests.yml | 16 +- .github/workflows/llvm-tests.yml | 281 +- MIT_LICENSE.TXT | 29 + README.md | 621 +- README_LLVM.md | 39 + clang/CMakeLists.txt | 3 + clang/CodeOwners.rst | 4 + clang/README.md | 38 + clang/bindings/python/clang/cindex.py | 15 + .../python/tests/cindex/test_diagnostics.py | 2 +- clang/include/clang-c/Index.h | 26 +- clang/include/clang/AST/ASTContext.h | 6 + clang/include/clang/AST/ASTNodeTraverser.h | 7 + clang/include/clang/AST/ComputeDependence.h | 3 + clang/include/clang/AST/Decl.h | 2 + clang/include/clang/AST/Expr.h | 2 + clang/include/clang/AST/ExprCilk.h | 71 + .../include/clang/AST/NonTrivialTypeVisitor.h | 2 + clang/include/clang/AST/RecursiveASTVisitor.h | 25 + clang/include/clang/AST/StmtCilk.h | 240 + clang/include/clang/AST/StmtVisitor.h | 2 + clang/include/clang/AST/Type.h | 46 +- clang/include/clang/AST/TypeLoc.h | 20 + clang/include/clang/AST/TypeProperties.td | 16 + clang/include/clang/Basic/Attr.td | 63 +- clang/include/clang/Basic/AttrDocs.td | 25 + clang/include/clang/Basic/Builtins.def | 4 + clang/include/clang/Basic/Cilk.h | 49 + clang/include/clang/Basic/CodeGenOptions.def | 4 + clang/include/clang/Basic/CodeGenOptions.h | 4 + .../clang/Basic/DiagnosticDriverKinds.td | 12 + clang/include/clang/Basic/DiagnosticGroups.td | 6 + .../clang/Basic/DiagnosticParseKinds.td | 28 + .../clang/Basic/DiagnosticSemaKinds.td | 181 +- clang/include/clang/Basic/Features.def | 3 + clang/include/clang/Basic/LangOptions.def | 8 + clang/include/clang/Basic/LangOptions.h | 30 + clang/include/clang/Basic/Sanitizers.def | 3 + clang/include/clang/Basic/StmtNodes.td | 7 + clang/include/clang/Basic/Tapir.h | 24 + clang/include/clang/Basic/TokenKinds.def | 8 + clang/include/clang/Basic/TypeNodes.td | 1 + clang/include/clang/Config/config.h.cmake | 3 + clang/include/clang/Driver/Options.td | 39 + clang/include/clang/Driver/SanitizerArgs.h | 1 + clang/include/clang/Driver/Tapir.h | 31 + clang/include/clang/Driver/ToolChain.h | 33 + clang/include/clang/Parse/Parser.h | 5 + clang/include/clang/Sema/DeclSpec.h | 31 +- clang/include/clang/Sema/Initialization.h | 13 +- clang/include/clang/Sema/Overload.h | 5 + clang/include/clang/Sema/Sema.h | 34 + .../include/clang/Serialization/ASTBitCodes.h | 7 + .../clang/Serialization/TypeBitCodes.def | 1 + clang/lib/AST/ASTContext.cpp | 93 + clang/lib/AST/ASTImporter.cpp | 55 + clang/lib/AST/ASTStructuralEquivalence.cpp | 20 + clang/lib/AST/AttrImpl.cpp | 5 + clang/lib/AST/ComputeDependence.cpp | 5 + clang/lib/AST/Decl.cpp | 21 +- clang/lib/AST/Expr.cpp | 3 + clang/lib/AST/ExprClassification.cpp | 16 +- clang/lib/AST/ExprConstant.cpp | 5 +- clang/lib/AST/ItaniumMangle.cpp | 7 + clang/lib/AST/MicrosoftMangle.cpp | 5 + clang/lib/AST/Stmt.cpp | 21 + clang/lib/AST/StmtPrinter.cpp | 67 + clang/lib/AST/StmtProfile.cpp | 20 + clang/lib/AST/Type.cpp | 86 + clang/lib/AST/TypePrinter.cpp | 25 + clang/lib/Basic/Targets/AArch64.h | 2 + clang/lib/CodeGen/BackendUtil.cpp | 129 +- clang/lib/CodeGen/CGAtomic.cpp | 4 + clang/lib/CodeGen/CGBlocks.cpp | 2 + clang/lib/CodeGen/CGBuiltin.cpp | 180 +- clang/lib/CodeGen/CGCall.cpp | 37 + clang/lib/CodeGen/CGCilk.cpp | 866 + clang/lib/CodeGen/CGClass.cpp | 1 + clang/lib/CodeGen/CGCleanup.cpp | 28 +- clang/lib/CodeGen/CGCleanup.h | 7 + clang/lib/CodeGen/CGDebugInfo.cpp | 7 + clang/lib/CodeGen/CGDebugInfo.h | 1 + clang/lib/CodeGen/CGDecl.cpp | 116 +- clang/lib/CodeGen/CGDeclCXX.cpp | 53 +- clang/lib/CodeGen/CGException.cpp | 20 +- clang/lib/CodeGen/CGExpr.cpp | 48 + clang/lib/CodeGen/CGExprAgg.cpp | 36 + clang/lib/CodeGen/CGExprCXX.cpp | 32 +- clang/lib/CodeGen/CGExprComplex.cpp | 61 + clang/lib/CodeGen/CGExprConstant.cpp | 2 +- clang/lib/CodeGen/CGExprScalar.cpp | 78 + clang/lib/CodeGen/CGLoopInfo.cpp | 48 +- clang/lib/CodeGen/CGLoopInfo.h | 20 + clang/lib/CodeGen/CGStmt.cpp | 37 +- clang/lib/CodeGen/CMakeLists.txt | 1 + clang/lib/CodeGen/CodeGenFunction.cpp | 49 +- clang/lib/CodeGen/CodeGenFunction.h | 594 +- clang/lib/CodeGen/CodeGenModule.cpp | 6 +- clang/lib/CodeGen/CodeGenPGO.cpp | 53 + clang/lib/CodeGen/CodeGenTypes.cpp | 2 + clang/lib/CodeGen/EHScopeStack.h | 2 + clang/lib/CodeGen/ItaniumCXXABI.cpp | 9 + clang/lib/Driver/CMakeLists.txt | 1 + clang/lib/Driver/Tapir.cpp | 84 + clang/lib/Driver/ToolChain.cpp | 288 + clang/lib/Driver/ToolChains/Clang.cpp | 88 + clang/lib/Driver/ToolChains/CloudABI.cpp | 3 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 81 + clang/lib/Driver/ToolChains/CommonArgs.h | 13 + clang/lib/Driver/ToolChains/CrossWindows.cpp | 2 + clang/lib/Driver/ToolChains/Darwin.cpp | 226 + clang/lib/Driver/ToolChains/Darwin.h | 22 + clang/lib/Driver/ToolChains/DragonFly.cpp | 2 + clang/lib/Driver/ToolChains/FreeBSD.cpp | 11 + clang/lib/Driver/ToolChains/Fuchsia.cpp | 3 + clang/lib/Driver/ToolChains/Gnu.cpp | 9 + clang/lib/Driver/ToolChains/Linux.cpp | 1 + clang/lib/Driver/ToolChains/MinGW.cpp | 2 + clang/lib/Driver/ToolChains/Minix.cpp | 2 + clang/lib/Driver/ToolChains/Myriad.cpp | 8 + clang/lib/Driver/ToolChains/NaCl.cpp | 2 + clang/lib/Driver/ToolChains/NetBSD.cpp | 6 + clang/lib/Driver/ToolChains/PS4CPU.cpp | 2 + clang/lib/Driver/ToolChains/Solaris.cpp | 2 + clang/lib/Frontend/CompilerInvocation.cpp | 153 + clang/lib/Frontend/InitPreprocessor.cpp | 53 + clang/lib/Parse/CMakeLists.txt | 1 + clang/lib/Parse/ParseCilk.cpp | 539 + clang/lib/Parse/ParseDecl.cpp | 66 +- clang/lib/Parse/ParseExpr.cpp | 18 +- clang/lib/Parse/ParseObjc.cpp | 1 + clang/lib/Parse/ParsePragma.cpp | 93 + clang/lib/Parse/ParseStmt.cpp | 37 + clang/lib/Parse/ParseTentative.cpp | 3 + clang/lib/Sema/CMakeLists.txt | 1 + clang/lib/Sema/DeclSpec.cpp | 1 + clang/lib/Sema/JumpDiagnostics.cpp | 98 + clang/lib/Sema/SemaCast.cpp | 10 + clang/lib/Sema/SemaChecking.cpp | 11 + clang/lib/Sema/SemaCilk.cpp | 140 + clang/lib/Sema/SemaDecl.cpp | 28 +- clang/lib/Sema/SemaDeclAttr.cpp | 26 + clang/lib/Sema/SemaDeclCXX.cpp | 17 + clang/lib/Sema/SemaExceptionSpec.cpp | 5 + clang/lib/Sema/SemaExpr.cpp | 170 +- clang/lib/Sema/SemaExprCXX.cpp | 6 + clang/lib/Sema/SemaExprMember.cpp | 10 +- clang/lib/Sema/SemaInit.cpp | 47 +- clang/lib/Sema/SemaLookup.cpp | 3 + clang/lib/Sema/SemaOverload.cpp | 45 +- clang/lib/Sema/SemaStmt.cpp | 642 +- clang/lib/Sema/SemaStmtAttr.cpp | 29 +- clang/lib/Sema/SemaTemplate.cpp | 4 + clang/lib/Sema/SemaTemplateDeduction.cpp | 10 + clang/lib/Sema/SemaTemplateVariadic.cpp | 6 + clang/lib/Sema/SemaType.cpp | 225 + clang/lib/Sema/TreeTransform.h | 220 + clang/lib/Serialization/ASTReader.cpp | 4 + clang/lib/Serialization/ASTReaderStmt.cpp | 63 + clang/lib/Serialization/ASTWriter.cpp | 4 + clang/lib/Serialization/ASTWriterStmt.cpp | 47 + clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 5 + clang/test/Cilk/Inputs/libopencilk-abi.bc | Bin 0 -> 30464 bytes clang/test/Cilk/addressof.cpp | 38 + clang/test/Cilk/cilk-exceptions.cpp | 512 + clang/test/Cilk/cilkfor-bad-input.c | 25 + clang/test/Cilk/cilkfor-bounds.cpp | 987 + clang/test/Cilk/cilkfor-continue.c | 11167 ++++++ .../Cilk/cilkfor-detach-unwind-rewrite.cpp | 50 + clang/test/Cilk/cilkfor-pgo.cpp | 24 + clang/test/Cilk/cilkfor-pointer.c | 9 + clang/test/Cilk/cilksan-O0.c | 38 + clang/test/Cilk/cilkscope-checks.c | 13 + clang/test/Cilk/cilkscope.c | 44 + clang/test/Cilk/clangchecks.cpp | 70 + clang/test/Cilk/constructors.cpp | 574 + clang/test/Cilk/early-return-while.c | 43 + clang/test/Cilk/early-return.c | 32 + clang/test/Cilk/early-return.cpp | 62 + clang/test/Cilk/hyper-address.c | 30 + clang/test/Cilk/hyper-alias-ctor.cpp | 25 + clang/test/Cilk/hyper-array-extern-1.cpp | 17 + clang/test/Cilk/hyper-array-extern-2.cpp | 19 + clang/test/Cilk/hyper-array-global.cpp | 15 + clang/test/Cilk/hyper-array-local.cpp | 43 + clang/test/Cilk/hyper-assign.c | 38 + clang/test/Cilk/hyper-autoincr.c | 119 + clang/test/Cilk/hyper-bare.cpp | 14 + clang/test/Cilk/hyper-cast-bad.c | 10 + clang/test/Cilk/hyper-complex.c | 35 + clang/test/Cilk/hyper-copy.c | 19 + clang/test/Cilk/hyper-destruct.cpp | 49 + clang/test/Cilk/hyper-errors.c | 40 + clang/test/Cilk/hyper-expand1.cpp | 47 + clang/test/Cilk/hyper-expand2.cpp | 32 + clang/test/Cilk/hyper-expand3.cpp | 17 + clang/test/Cilk/hyper-generic.c | 66 + clang/test/Cilk/hyper-global-c.c | 13 + clang/test/Cilk/hyper-global-ctor-dtor.cpp | 15 + clang/test/Cilk/hyper-global-ctor-only.cpp | 15 + clang/test/Cilk/hyper-global-dtor-only.cpp | 14 + clang/test/Cilk/hyper-new-final.cpp | 48 + clang/test/Cilk/hyper-new.cpp | 50 + clang/test/Cilk/hyper-no-dtor.cpp | 14 + clang/test/Cilk/hyper-nocilk.c | 11 + clang/test/Cilk/hyper-overload.cpp | 20 + clang/test/Cilk/hyper-param-bad.c | 5 + clang/test/Cilk/hyper-param-bad.cpp | 12 + clang/test/Cilk/hyper-pointer.c | 13 + clang/test/Cilk/hyper-reference.cpp | 7 + clang/test/Cilk/hyper-register.c | 46 + clang/test/Cilk/hyper-struct-assign.c | 10 + clang/test/Cilk/hyper-template-errors.cpp | 23 + clang/test/Cilk/hyper-template.cpp | 20 + clang/test/Cilk/hyper-template2.cpp | 25 + clang/test/Cilk/hyper-unary.c | 52 + clang/test/Cilk/hyper-unique.c | 8 + clang/test/Cilk/hyper-zero.c | 16 + clang/test/Cilk/implicit-sync-scopes.cpp | 2550 ++ clang/test/Cilk/implicit-sync.c | 49 + clang/test/Cilk/looptest.cpp | 62 + clang/test/Cilk/multiple-spawn-args-check.c | 10 + clang/test/Cilk/nested-trycatch.cpp | 57 + clang/test/Cilk/opencilk-spawn.cpp | 79 + clang/test/Cilk/reducer-skip-init.c | 12 + clang/test/Cilk/regiontest.c | 55 + clang/test/Cilk/spawn-atomic.c | 17 + clang/test/Cilk/spawn-builtin.c | 76 + clang/test/Cilk/spawn-call-arg.c | 17 + .../Cilk/spawn-decl-with-constructors.cpp | 267 + clang/test/Cilk/spawn-expr.c | 15 + clang/test/Cilk/spawn-in-cilk-for.c | 101 + clang/test/Cilk/spawn-return.cpp | 3 + clang/test/Cilk/spawn-template.cpp | 78 + clang/test/Cilk/spawntest.cpp | 104 + clang/test/Cilk/stream-compat.cpp | 22 + clang/test/Cilk/syncregion-debug-info.c | 17 + clang/test/Cilk/tapirloopattrs.c | 39 + clang/test/Cilk/taskframe-always-inline.cpp | 51 + clang/test/Cilk/trivial-assign-op.cpp | 63 + clang/test/Cilk/unreachable-sync.cpp | 18 + clang/test/Cilk/vla-of-hyper.c | 19 + clang/test/Cilk/worker-load-test.c | 100 + clang/test/CodeGenCXX/threadlocal_address.cpp | 2 +- ...e_comprehensive_static_instrumentation.cpp | 11 + ...a-attribute-supported-attributes-list.test | 8 + clang/test/Misc/show-diag-options.c | 2 +- clang/test/Sema/builtin-longjmp.c | 2 +- clang/tools/driver/CMakeLists.txt | 1 + clang/tools/libclang/CIndex.cpp | 11 + clang/tools/libclang/CXCursor.cpp | 20 + clang/tools/libclang/CXType.cpp | 5 + .../cmake/Modules/CompilerRTDarwinUtils.cmake | 4 + compiler-rt/lib/builtins/CMakeLists.txt | 4 + .../lib/sanitizer_common/sanitizer_linux.cpp | 4 +- libcxxabi/src/demangle/ItaniumDemangle.h | 38 + libcxxabi/src/demangle/ItaniumNodes.def | 1 + lld/COFF/Config.h | 5 + lld/COFF/Driver.cpp | 6 + lld/COFF/LTO.cpp | 3 + lld/COFF/Options.td | 4 + lld/Common/Args.cpp | 18 + lld/ELF/Config.h | 3 + lld/ELF/Driver.cpp | 4 + lld/ELF/DriverUtils.cpp | 1 + lld/ELF/LTO.cpp | 4 + lld/ELF/Options.td | 8 + lld/MachO/Config.h | 4 + lld/MachO/Driver.cpp | 4 + lld/MachO/LTO.cpp | 3 + lld/MachO/Options.td | 6 + lld/include/lld/Common/Args.h | 5 + llvm/.gitignore | 5 + llvm/.gitmodules | 0 llvm/CMakeLists.txt | 49 +- llvm/CODE_OWNERS.TXT | 4 + llvm/CREDITS.TXT | 25 + llvm/README.md | 23 + llvm/README.txt | 17 - llvm/WORKSPACE | 1 + llvm/bindings/ocaml/llvm/META.llvm.in | 8 + llvm/bindings/ocaml/llvm/llvm.ml | 9 + llvm/bindings/ocaml/llvm/llvm.mli | 21 + llvm/bindings/ocaml/llvm/llvm_ocaml.c | 24 + llvm/bindings/ocaml/transforms/CMakeLists.txt | 1 + .../transforms/tapir_opts/CMakeLists.txt | 5 + .../transforms/tapir_opts/llvm_tapir_opts.ml | 19 + .../transforms/tapir_opts/llvm_tapir_opts.mli | 19 + .../transforms/tapir_opts/tapir_opts_ocaml.c | 33 + llvm/cmake/config.guess | 2 +- llvm/examples/Kaleidoscope/CMakeLists.txt | 1 + .../Kaleidoscope/Tapir/CMakeLists.txt | 20 + .../Kaleidoscope/Tapir/KaleidoscopeJIT.h | 206 + llvm/examples/Kaleidoscope/Tapir/toy.cpp | 2163 ++ llvm/examples/Kaleidoscope/lib/toylib.c | 24 + llvm/include/llvm-c/Core.h | 22 +- llvm/include/llvm-c/Transforms/Tapir.h | 46 + llvm/include/llvm/Analysis/AliasAnalysis.h | 52 + .../llvm/Analysis/BasicAliasAnalysis.h | 4 + llvm/include/llvm/Analysis/CodeMetrics.h | 7 +- .../llvm/Analysis/DataRaceFreeAliasAnalysis.h | 85 + .../llvm/Analysis/DependenceAnalysis.h | 85 +- .../llvm/Analysis/LoopAccessAnalysis.h | 20 +- .../llvm/Analysis/LoopAnalysisManager.h | 2 + llvm/include/llvm/Analysis/LoopInfo.h | 53 + llvm/include/llvm/Analysis/LoopIterator.h | 19 +- llvm/include/llvm/Analysis/MemoryBuiltins.h | 4 + .../llvm/Analysis/MemoryDependenceAnalysis.h | 7 +- llvm/include/llvm/Analysis/MemorySSA.h | 6 +- llvm/include/llvm/Analysis/MustExecute.h | 4 + .../include/llvm/Analysis/SparsePropagation.h | 5 + llvm/include/llvm/Analysis/TapirRaceDetect.h | 365 + .../llvm/Analysis/TapirTargetFuncs.def | 29 + llvm/include/llvm/Analysis/TapirTaskInfo.h | 1545 + .../include/llvm/Analysis/TargetLibraryInfo.h | 70 + .../llvm/Analysis/TargetTransformInfo.h | 29 + .../llvm/Analysis/TargetTransformInfoImpl.h | 14 + llvm/include/llvm/Analysis/WorkSpanAnalysis.h | 57 + llvm/include/llvm/AsmParser/LLParser.h | 3 + llvm/include/llvm/AsmParser/LLToken.h | 6 + llvm/include/llvm/Bitcode/LLVMBitCodes.h | 11 + .../llvm/CodeGen/GlobalISel/IRTranslator.h | 6 + llvm/include/llvm/CodeGen/MIRYamlMapping.h | 2 + llvm/include/llvm/CodeGen/MachineFrameInfo.h | 1 + llvm/include/llvm/CodeGen/MachineFunction.h | 20 + llvm/include/llvm/CodeGen/Passes.h | 5 + llvm/include/llvm/CodeGen/TailDuplicator.h | 7 +- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 35 + llvm/include/llvm/Config/llvm-config.h.cmake | 9 + llvm/include/llvm/Demangle/ItaniumDemangle.h | 38 + llvm/include/llvm/Demangle/ItaniumNodes.def | 1 + llvm/include/llvm/IR/Attributes.td | 24 + llvm/include/llvm/IR/BasicBlock.h | 10 + llvm/include/llvm/IR/DerivedTypes.h | 4 + llvm/include/llvm/IR/EHPersonalities.h | 3 +- llvm/include/llvm/IR/Function.h | 8 + llvm/include/llvm/IR/IRBuilder.h | 31 + llvm/include/llvm/IR/InstVisitor.h | 9 + llvm/include/llvm/IR/InstrTypes.h | 5 + llvm/include/llvm/IR/Instruction.def | 143 +- llvm/include/llvm/IR/Instruction.h | 4 + llvm/include/llvm/IR/Instructions.h | 271 + llvm/include/llvm/IR/IntrinsicInst.h | 20 + llvm/include/llvm/IR/Intrinsics.td | 103 +- llvm/include/llvm/InitializePasses.h | 17 + llvm/include/llvm/LTO/Config.h | 7 + llvm/include/llvm/LinkAllPasses.h | 9 + llvm/include/llvm/Passes/PassBuilder.h | 67 +- llvm/include/llvm/Support/GenericLoopInfo.h | 5 +- .../llvm/Support/GenericLoopInfoImpl.h | 5 +- .../include/llvm/Transforms/Instrumentation.h | 23 + .../llvm/Transforms/Instrumentation/CSI.h | 1619 + .../Instrumentation/CilkSanitizer.h | 34 + .../ComprehensiveStaticInstrumentation.h | 48 + .../SurgicalInstrumentationConfig.h | 156 + llvm/include/llvm/Transforms/Scalar/GVN.h | 4 +- .../llvm/Transforms/Scalar/IndVarSimplify.h | 11 + .../llvm/Transforms/Scalar/JumpThreading.h | 3 + llvm/include/llvm/Transforms/Scalar/SROA.h | 6 +- llvm/include/llvm/Transforms/Tapir.h | 67 + llvm/include/llvm/Transforms/Tapir/CilkABI.h | 134 + .../llvm/Transforms/Tapir/CilkRTSCilkFor.h | 60 + llvm/include/llvm/Transforms/Tapir/CudaABI.h | 91 + .../Transforms/Tapir/DRFScopedNoAliasAA.h | 29 + .../include/llvm/Transforms/Tapir/LambdaABI.h | 99 + .../llvm/Transforms/Tapir/LoopSpawningTI.h | 27 + .../llvm/Transforms/Tapir/LoopStripMine.h | 54 + .../llvm/Transforms/Tapir/LoopStripMinePass.h | 32 + .../llvm/Transforms/Tapir/LoweringUtils.h | 565 + .../llvm/Transforms/Tapir/OMPTaskABI.h | 99 + .../llvm/Transforms/Tapir/OpenCilkABI.h | 197 + .../include/llvm/Transforms/Tapir/OpenMPABI.h | 66 + llvm/include/llvm/Transforms/Tapir/Outline.h | 90 + .../llvm/Transforms/Tapir/QthreadsABI.h | 82 + .../include/llvm/Transforms/Tapir/SerialABI.h | 53 + .../Transforms/Tapir/SerializeSmallTasks.h | 29 + .../llvm/Transforms/Tapir/TapirLoopInfo.h | 252 + .../llvm/Transforms/Tapir/TapirTargetIDs.h | 84 + .../llvm/Transforms/Tapir/TapirToTarget.h | 34 + .../llvm/Transforms/Utils/BasicBlockUtils.h | 6 + .../llvm/Transforms/Utils/BuildLibCalls.h | 7 + llvm/include/llvm/Transforms/Utils/Cloning.h | 6 + llvm/include/llvm/Transforms/Utils/Local.h | 7 + .../llvm/Transforms/Utils/LoopRotationUtils.h | 6 +- .../include/llvm/Transforms/Utils/LoopUtils.h | 27 +- .../llvm/Transforms/Utils/PromoteMemToReg.h | 3 +- .../llvm/Transforms/Utils/SSAUpdater.h | 5 + .../llvm/Transforms/Utils/SSAUpdaterImpl.h | 92 +- .../llvm/Transforms/Utils/TapirUtils.h | 380 + .../llvm/Transforms/Utils/TaskCanonicalize.h | 28 + .../llvm/Transforms/Utils/TaskSimplify.h | 41 + .../llvm/Transforms/Utils/UnrollLoop.h | 3 +- llvm/include/module.modulemap | 1 + llvm/lib/Analysis/AliasAnalysis.cpp | 272 + llvm/lib/Analysis/AliasSetTracker.cpp | 16 +- llvm/lib/Analysis/Analysis.cpp | 3 + llvm/lib/Analysis/BasicAliasAnalysis.cpp | 304 +- llvm/lib/Analysis/CMakeLists.txt | 4 + llvm/lib/Analysis/CaptureTracking.cpp | 43 + llvm/lib/Analysis/CodeMetrics.cpp | 21 +- .../Analysis/DataRaceFreeAliasAnalysis.cpp | 145 + llvm/lib/Analysis/DependenceAnalysis.cpp | 749 +- llvm/lib/Analysis/InlineCost.cpp | 17 + llvm/lib/Analysis/LoopAccessAnalysis.cpp | 51 +- llvm/lib/Analysis/LoopAnalysisManager.cpp | 3 + llvm/lib/Analysis/LoopInfo.cpp | 270 +- llvm/lib/Analysis/MemoryBuiltins.cpp | 21 + .../lib/Analysis/MemoryDependenceAnalysis.cpp | 25 +- llvm/lib/Analysis/MemorySSA.cpp | 110 +- llvm/lib/Analysis/MustExecute.cpp | 105 +- llvm/lib/Analysis/TapirRaceDetect.cpp | 2206 ++ llvm/lib/Analysis/TapirTaskInfo.cpp | 1846 + llvm/lib/Analysis/TargetLibraryInfo.cpp | 100 +- llvm/lib/Analysis/TargetTransformInfo.cpp | 5 + llvm/lib/Analysis/ValueTracking.cpp | 11 + llvm/lib/Analysis/WorkSpanAnalysis.cpp | 118 + llvm/lib/AsmParser/LLLexer.cpp | 3 + llvm/lib/AsmParser/LLParser.cpp | 98 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 83 + llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 43 + llvm/lib/CodeGen/CMakeLists.txt | 1 + llvm/lib/CodeGen/CodeGen.cpp | 1 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 73 + .../LiveDebugValues/InstrRefBasedImpl.cpp | 12 + llvm/lib/CodeGen/MIRParser/MIRParser.cpp | 1 + llvm/lib/CodeGen/MIRPrinter.cpp | 1 + llvm/lib/CodeGen/MachineBlockPlacement.cpp | 11 +- llvm/lib/CodeGen/MachineSSAUpdater.cpp | 19 + llvm/lib/CodeGen/MachineSink.cpp | 94 + llvm/lib/CodeGen/RegisterCoalescer.cpp | 53 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 10 + .../SelectionDAG/ScheduleDAGSDNodes.cpp | 2 + .../SelectionDAG/SelectionDAGBuilder.cpp | 103 + .../SelectionDAG/SelectionDAGBuilder.h | 6 + .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 13 + llvm/lib/CodeGen/ShrinkWrap.cpp | 3 +- llvm/lib/CodeGen/TailDuplicator.cpp | 121 +- llvm/lib/CodeGen/TapirCleanup.cpp | 101 + llvm/lib/CodeGen/TargetLoweringBase.cpp | 3 + llvm/lib/CodeGen/TargetPassConfig.cpp | 3 + llvm/lib/IR/AsmWriter.cpp | 27 + llvm/lib/IR/BasicBlock.cpp | 19 + llvm/lib/IR/Core.cpp | 28 + llvm/lib/IR/DebugInfo.cpp | 2 +- llvm/lib/IR/EHPersonalities.cpp | 3 + llvm/lib/IR/Instruction.cpp | 14 + llvm/lib/IR/Instructions.cpp | 186 + llvm/lib/IR/IntrinsicInst.cpp | 4 + llvm/lib/IR/Type.cpp | 7 + llvm/lib/IR/Verifier.cpp | 151 +- llvm/lib/LTO/LTO.cpp | 1 + llvm/lib/LTO/LTOBackend.cpp | 19 +- llvm/lib/Passes/CMakeLists.txt | 1 + llvm/lib/Passes/PassBuilder.cpp | 23 +- llvm/lib/Passes/PassBuilderPipelines.cpp | 380 +- llvm/lib/Passes/PassRegistry.def | 16 + .../Target/AArch64/AArch64ISelLowering.cpp | 197 + llvm/lib/Target/AArch64/AArch64ISelLowering.h | 9 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 78 + llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 26 + .../Target/AArch64/AArch64RegisterInfo.cpp | 6 + .../MCTargetDesc/AArch64MCCodeEmitter.cpp | 3 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 + llvm/lib/Target/X86/X86InstrInfo.cpp | 169 +- llvm/lib/Target/X86/X86InstrInfo.h | 9 + llvm/lib/Transforms/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 58 +- .../lib/Transforms/IPO/InferFunctionAttrs.cpp | 1 + llvm/lib/Transforms/IPO/PartialInlining.cpp | 3 + .../InstCombine/InstCombineCalls.cpp | 122 + .../InstCombine/InstructionCombining.cpp | 10 + .../Instrumentation/AddressSanitizer.cpp | 36 +- .../Transforms/Instrumentation/CMakeLists.txt | 5 + .../Instrumentation/CilkSanitizer.cpp | 4829 +++ .../ComprehensiveStaticInstrumentation.cpp | 2945 ++ .../Instrumentation/GCOVProfiling.cpp | 7 +- .../Instrumentation/Instrumentation.cpp | 1 - .../SurgicalInstrumentationConfig.cpp | 109 + .../Instrumentation/ThreadSanitizer.cpp | 4 + llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 7 + llvm/lib/Transforms/Scalar/GVN.cpp | 103 +- llvm/lib/Transforms/Scalar/GVNHoist.cpp | 1 + llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 118 +- llvm/lib/Transforms/Scalar/JumpThreading.cpp | 92 +- llvm/lib/Transforms/Scalar/LICM.cpp | 175 +- llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 9 +- llvm/lib/Transforms/Scalar/LoopDistribute.cpp | 1 + .../Transforms/Scalar/LoopIdiomRecognize.cpp | 5 + .../Transforms/Scalar/LoopLoadElimination.cpp | 1 + .../lib/Transforms/Scalar/LoopPassManager.cpp | 3 + llvm/lib/Transforms/Scalar/LoopRotation.cpp | 13 +- .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 13 + llvm/lib/Transforms/Scalar/LoopSink.cpp | 13 +- .../Transforms/Scalar/LoopStrengthReduce.cpp | 17 +- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 48 +- .../Transforms/Scalar/LoopVersioningLICM.cpp | 2 +- llvm/lib/Transforms/Scalar/SROA.cpp | 80 +- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 79 +- .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 72 + .../Scalar/TailRecursionElimination.cpp | 271 +- .../Scalar/WarnMissedTransforms.cpp | 14 + llvm/lib/Transforms/Tapir/CMakeLists.txt | 41 + llvm/lib/Transforms/Tapir/CilkABI.cpp | 1796 + llvm/lib/Transforms/Tapir/CilkRTSCilkFor.cpp | 302 + .../Transforms/Tapir/DRFScopedNoAliasAA.cpp | 332 + llvm/lib/Transforms/Tapir/LambdaABI.cpp | 578 + llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp | 1767 + llvm/lib/Transforms/Tapir/LoopStripMine.cpp | 1559 + .../Transforms/Tapir/LoopStripMinePass.cpp | 454 + llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 1275 + llvm/lib/Transforms/Tapir/OMPTaskABI.cpp | 597 + llvm/lib/Transforms/Tapir/OpenCilkABI.cpp | 1144 + llvm/lib/Transforms/Tapir/Outline.cpp | 567 + llvm/lib/Transforms/Tapir/QthreadsABI.cpp | 350 + llvm/lib/Transforms/Tapir/SerialABI.cpp | 52 + .../Transforms/Tapir/SerializeSmallTasks.cpp | 216 + llvm/lib/Transforms/Tapir/Tapir.cpp | 35 + llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp | 646 + llvm/lib/Transforms/Tapir/TapirToTarget.cpp | 610 + llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 49 +- .../Transforms/Utils/BreakCriticalEdges.cpp | 48 + llvm/lib/Transforms/Utils/BuildLibCalls.cpp | 14 + llvm/lib/Transforms/Utils/CMakeLists.txt | 3 + llvm/lib/Transforms/Utils/CloneFunction.cpp | 12 +- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 8 + llvm/lib/Transforms/Utils/InlineFunction.cpp | 609 +- llvm/lib/Transforms/Utils/LCSSA.cpp | 11 +- llvm/lib/Transforms/Utils/Local.cpp | 102 +- .../Transforms/Utils/LoopRotationUtils.cpp | 31 +- llvm/lib/Transforms/Utils/LoopSimplify.cpp | 27 + llvm/lib/Transforms/Utils/LoopUnroll.cpp | 141 +- .../Transforms/Utils/LoopUnrollRuntime.cpp | 3 +- llvm/lib/Transforms/Utils/LoopUtils.cpp | 105 +- llvm/lib/Transforms/Utils/LoopVersioning.cpp | 1 + llvm/lib/Transforms/Utils/Mem2Reg.cpp | 40 +- .../Utils/PromoteMemoryToRegister.cpp | 67 +- llvm/lib/Transforms/Utils/SCCPSolver.cpp | 14 + llvm/lib/Transforms/Utils/SSAUpdater.cpp | 79 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 405 +- llvm/lib/Transforms/Utils/TapirUtils.cpp | 2509 ++ .../lib/Transforms/Utils/TaskCanonicalize.cpp | 71 + llvm/lib/Transforms/Utils/TaskSimplify.cpp | 702 + .../Transforms/Vectorize/LoopVectorize.cpp | 3 + llvm/projects/CMakeLists.txt | 6 +- llvm/runtimes/CMakeLists.txt | 2 + .../TapirRaceDetect/bitcast-function.ll | 27 + .../check-pointer-with-casts.ll | 370 + .../TapirRaceDetect/tapir-rd-objects.ll | 6079 ++++ .../TapirTaskInfo/detach-continue-loop.ll | 33 + .../Analysis/TapirTaskInfo/drf-aa-calls.ll | 39 + .../TapirTaskInfo/interleaved-sync-region.ll | 70 + .../test/Analysis/TapirTaskInfo/memssa-drf.ll | 43 + .../TapirTaskInfo/shared-eh-spindles.ll | 468 + .../Analysis/TapirTaskInfo/simple-spawn.ll | 43 + .../Analysis/TapirTaskInfo/spindle-loop.ll | 544 + llvm/test/Bindings/OCaml/core.ml | 29 + llvm/test/Bitcode/tapir.ll | 42 + llvm/test/CodeGen/AArch64/O0-pipeline.ll | 9 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 8 + .../CodeGen/AArch64/arm64-shrink-wrapping.ll | 4 +- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll | 24 +- .../CodeGen/AArch64/optimize-cond-branch.ll | 2 +- llvm/test/CodeGen/AArch64/ragreedy-csr.ll | 6 +- .../test/CodeGen/AArch64/setjmp-straighten.ll | 58 + llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 44 + llvm/test/CodeGen/ARM/O3-pipeline.ll | 7 + ...no-register-coalescing-in-returnsTwice.mir | 1 + llvm/test/CodeGen/LoongArch/O0-pipeline.ll | 9 +- llvm/test/CodeGen/LoongArch/opt-pipeline.ll | 7 + llvm/test/CodeGen/PowerPC/O0-pipeline.ll | 9 +- llvm/test/CodeGen/PowerPC/O3-pipeline.ll | 8 + llvm/test/CodeGen/RISCV/O0-pipeline.ll | 9 +- llvm/test/CodeGen/RISCV/O3-pipeline.ll | 7 + llvm/test/CodeGen/X86/O0-pipeline.ll | 9 +- llvm/test/CodeGen/X86/fp128-select.ll | 4 +- llvm/test/CodeGen/X86/fshl.ll | 2 +- llvm/test/CodeGen/X86/fshr.ll | 2 +- llvm/test/CodeGen/X86/mul-legalize.ll | 3 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 7 + llvm/test/CodeGen/X86/pr38795.ll | 3 +- llvm/test/CodeGen/X86/pr48533.ll | 42 + llvm/test/CodeGen/X86/ragreedy-bug.ll | 2 - llvm/test/CodeGen/X86/setjmp-straighten.ll | 58 + llvm/test/CodeGen/X86/shadow-stack.ll | 40 +- llvm/test/CodeGen/X86/sse1.ll | 12 +- .../X86/tail-dup-merge-loop-headers.ll | 2 +- llvm/test/CodeGen/X86/tail-dup-partial.ll | 2 +- llvm/test/CodeGen/X86/tail-dup-repeat.ll | 2 +- llvm/test/Examples/Kaleidoscope/Tapir.test | 57 + llvm/test/Other/loop-pm-invalidation.ll | 7 + llvm/test/Other/new-pass-manager.ll | 1 + llvm/test/Other/new-pm-defaults.ll | 36 + llvm/test/Other/new-pm-lto-defaults.ll | 2 + .../Other/new-pm-thinlto-postlink-defaults.ll | 36 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 36 + ...-pm-thinlto-postlink-samplepgo-defaults.ll | 36 + .../Other/new-pm-thinlto-prelink-defaults.ll | 6 + .../new-pm-thinlto-prelink-pgo-defaults.ll | 7 + ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 6 + llvm/test/Other/opt-pipeline-vector-passes.ll | 1 + llvm/test/Transforms/CSI/bb-cost.ll | 610 + llvm/test/Transforms/CSI/csi-global-names.ll | 46 + .../Coroutines/coro-retcon-resume-values.ll | 6 +- .../FunctionAttrs/int_sideeffect.ll | 4 +- llvm/test/Transforms/LoopRotate/pr35210.ll | 2 + .../MergeFunc/call-and-invoke-with-ranges.ll | 4 +- llvm/test/Transforms/MergeFunc/inline-asm.ll | 6 +- ...ting-sinking-required-for-vectorization.ll | 4 +- ...ple-unreachable-exits-for-vectorization.ll | 4 +- .../PhaseOrdering/ARM/arm_mult_q15.ll | 12 +- .../test/Transforms/PhaseOrdering/X86/vdiv.ll | 12 +- .../loop-rotation-vs-common-code-hoisting.ll | 2 +- .../PhaseOrdering/recompute-globalsaa.ll | 2 +- llvm/test/Transforms/Tapir/157.ll | 123 + .../CilkSanitizer/allocfreefn-nobuiltin.ll | 936 + .../CilkSanitizer/cilksan-aarch64-neon.ll | 698 + .../cilksan-capture-ptr-inbounds-gep.ll | 4396 +++ .../CilkSanitizer/cilksan-instr-checking.ll | 159 + .../CilkSanitizer/cilksan-loop-exit-loop.ll | 888 + .../cilksan-split-block-preds.ll | 62 + .../CilkSanitizer/cilksan-taskframe-use.ll | 131 + .../cilksan-unreachable-sharedeh.ll | 136 + .../Tapir/CilkSanitizer/constantexpr.ll | 21386 +++++++++++ .../CilkSanitizer/csi-global_ctor-zeroinit.ll | 3515 ++ .../CilkSanitizer/da-gcd-different-loops.ll | 160 + .../Transforms/Tapir/CilkSanitizer/fib.ll | 194 + .../Tapir/CilkSanitizer/fib_racy.ll | 201 + .../Tapir/CilkSanitizer/finddbgvalues.ll | 139 + .../Tapir/CilkSanitizer/loop-hoist.ll | 709 + .../loop-spawning-move-instrumentation.ll | 2780 ++ .../Transforms/Tapir/CilkSanitizer/nqueens.ll | 553 + .../Tapir/CilkSanitizer/nqueens_racy.ll | 549 + .../Tapir/CilkSanitizer/null-task-for-bb.ll | 305 + .../Tapir/CilkSanitizer/ptr-not-stripped.ll | 3528 ++ .../Tapir/CilkSanitizer/setup-blocks.ll | 1047 + .../split-unreachable-predecessors.ll | 135 + .../task-analysis-unreachable.ll | 42 + .../CilkSanitizer/unreachable-in-loop.ll | 5787 +++ .../Tapir/aarch64-threadpointer-opt.ll | 515 + .../arm64-stealable-setjmp-base-pointer.ll | 65 + .../asan-parallel-promotable-alloca-check.ll | 46 + .../test/Transforms/Tapir/atomic-bool-sroa.ll | 217 + .../Transforms/Tapir/canonical-iv-size.ll | 242 + .../Tapir/cilk-gxx-personality-inline.ll | 70 + .../Transforms/Tapir/cilk-lowering-complex.ll | 2669 ++ .../Tapir/cilk-stackframe-flags-load.ll | 1649 + .../Transforms/Tapir/cilkrts-lowering-test.ll | 330 + .../Transforms/Tapir/cilkrts-spawn-large.ll | 5055 +++ .../Tapir/cilksan-tapirlower-asan.ll | 270 + .../Tapir/codegen-task-frameaddress.ll | 255 + llvm/test/Transforms/Tapir/csenorhino.ll | 44 + .../Tapir/csi-detach-continue-split.ll | 67 + .../Transforms/Tapir/csi-detach-unwind.ll | 879 + .../Transforms/Tapir/csi-tapir-lowering.ll | 3457 ++ .../Transforms/Tapir/dac-loopspawning-ivs.ll | 1140 + .../Tapir/dac-loopspawning-simple.ll | 97 + .../Transforms/Tapir/dead-tapir-intrinsics.ll | 9061 +++++ llvm/test/Transforms/Tapir/declare-main.ll | 23 + llvm/test/Transforms/Tapir/detach-gcov.ll | 117 + .../Transforms/Tapir/detach-phi-lowering.ll | 60 + .../detach-unreachable-with-exceptions.ll | 424 + .../Transforms/Tapir/detach-unwind-cleanup.ll | 127 + llvm/test/Transforms/Tapir/empty-scope.ll | 49 + llvm/test/Transforms/Tapir/empty.ll | 31 + .../test/Transforms/Tapir/equal-tripcounts.ll | 300 + .../Tapir/exception-loop-spawning.ll | 591 + .../Transforms/Tapir/exception-lowering.ll | 881 + ...exception-spawn-in-parfor-loop-spawning.ll | 889 + llvm/test/Transforms/Tapir/exit.ll | 20 + llvm/test/Transforms/Tapir/functionattrs.ll | 59 + .../Tapir/get-worker-number-lowering.ll | 729 + .../Transforms/Tapir/gvn-detach-continue.ll | 46 + .../test/Transforms/Tapir/hyperlookup-opts.ll | 333 + .../Tapir/inclusive-range-tapir-loop.ll | 1395 + .../Tapir/indvar-high-cost-expansion.ll | 225 + .../Transforms/Tapir/inline-after-lowering.ll | 136 + llvm/test/Transforms/Tapir/inline-byval.ll | 122 + .../Tapir/inline-detachedrethrow-phi.ll | 111 + ...-nounwind-detach-into-invoked-taskframe.ll | 102 + .../Transforms/Tapir/inline-spawn-lpads.ll | 410 + .../inline-tapir-loop-in-continuation.ll | 333 + .../Tapir/inline-task-update-phi.ll | 92 + .../Tapir/inline-taskframe-resume.ll | 276 + .../Tapir/inline-taskframe-split.ll | 166 + .../Transforms/Tapir/inline-unify-resume.ll | 7801 ++++ .../Tapir/inlining-personality-2.ll | 72 + .../Transforms/Tapir/inlining-personality.ll | 74 + .../Tapir/instcombine-sink-past-sync.ll | 73 + llvm/test/Transforms/Tapir/iv-canonicalize.ll | 79 + .../Transforms/Tapir/iv-use-after-loop.ll | 314 + .../Tapir/jump-threading-detach-continue-2.ll | 108 + .../Tapir/jump-threading-detach-continue.ll | 292 + .../Tapir/jump-threading-tapir-vh.ll | 28 + .../Transforms/Tapir/lcssa-loop-task-exit.ll | 2769 ++ llvm/test/Transforms/Tapir/libopencilk-abi.bc | Bin 0 -> 50512 bytes .../Transforms/Tapir/licm-loop-task-exits.ll | 8360 +++++ llvm/test/Transforms/Tapir/loop-analysis.ll | 87 + .../Tapir/loop-control-uses-parameter.ll | 405 + llvm/test/Transforms/Tapir/loop-grainsize.ll | 84 + llvm/test/Transforms/Tapir/loop-remark-iv.ll | 93 + .../Tapir/loop-spawning-alloca-hoist.ll | 340 + ...oop-spawning-iv-tripcount-type-mismatch.ll | 284 + .../Tapir/loop-spawning-iv-types.ll | 81 + .../Tapir/loop-spawning-unusual-backedge.ll | 156 + .../Tapir/loop-spawning-variant-condition.ll | 77 + .../Transforms/Tapir/loop-stripmine-attrs.ll | 136 + .../Tapir/loop-stripmine-clone-sharedeh.ll | 520 + .../Tapir/loop-stripmine-epilog-taskframe.ll | 540 + ...op-stripmine-iv-tripcount-type-mismatch.ll | 110 + .../Transforms/Tapir/loop-stripmine-unwind.ll | 233 + llvm/test/Transforms/Tapir/loop-stripmine.ll | 468 + .../Tapir/loop-unknown-tripcount.ll | 68 + .../Transforms/Tapir/loop-unswitch-lcssa.ll | 241 + llvm/test/Transforms/Tapir/loop-unswitch.ll | 455 + llvm/test/Transforms/Tapir/looplimit.ll | 96 + llvm/test/Transforms/Tapir/loops-complex.ll | 1826 + .../Tapir/loops-with-lcssa-lpads.ll | 842 + .../Transforms/Tapir/loopstripmine-parepil.ll | 5082 +++ llvm/test/Transforms/Tapir/lower-eh-unlink.ll | 512 + .../Transforms/Tapir/lower-static-allocas.ll | 140 + .../Transforms/Tapir/lower-unwind-phis.ll | 2043 ++ .../Tapir/lowering-arg-struct-attributes.ll | 811 + ...owering-nested-detaches-with-exceptions.ll | 6264 ++++ .../Tapir/lowering-nested-detaches.ll | 153 + .../lowering-taskframe-resume-critical.ll | 789 + .../Tapir/lowering-taskframe-shared-eh.ll | 1893 + .../Tapir/machine-sink-loop-peel.ll | 591 + llvm/test/Transforms/Tapir/machine-sink.ll | 677 + llvm/test/Transforms/Tapir/memoryssa-sync.ll | 58 + .../Transforms/Tapir/missed-loop-opts-test.ll | 87 + .../nested-loop-spawning-with-exceptions.ll | 1168 + .../Transforms/Tapir/nested-loop-spawning.ll | 907 + .../Tapir/nested-outline-vector-width.ll | 3537 ++ llvm/test/Transforms/Tapir/nested-trycatch.ll | 280 + .../Tapir/opencilk-lowering-debuginfo.ll | 6684 ++++ .../Transforms/Tapir/opencilk-no-bitcode.ll | 76 + .../outline-ancestor-shared-eh-spindle.ll | 641 + .../Transforms/Tapir/outline-helper-debug.ll | 307 + .../Tapir/outline-helper-drop-attrs.ll | 74 + .../Transforms/Tapir/outline-prologue-data.ll | 318 + .../Tapir/outline-remap-debug-md.ll | 7045 ++++ .../Transforms/Tapir/parallel-licm-norhino.ll | 70 + .../Transforms/Tapir/phi-in-continuation.ll | 686 + .../Tapir/preheader-sync-split-analyses.ll | 443 + llvm/test/Transforms/Tapir/returned-param.ll | 109 + llvm/test/Transforms/Tapir/sentinel-test.ll | 112 + .../Tapir/serial-tt-cfg-analysis.ll | 173 + .../serialize-detach-taskframe-simple.ll | 85 + .../Transforms/Tapir/serialize-small-loop.ll | 77 + llvm/test/Transforms/Tapir/setjmp-hoisting.ll | 83 + .../Transforms/Tapir/simple-loop-unswitch.ll | 154 + .../Tapir/simplifycfg-syncunwind.ll | 77 + .../Transforms/Tapir/slp-vectorize-long-bb.ll | 1264 + llvm/test/Transforms/Tapir/spawn-pfor.ll | 85 + llvm/test/Transforms/Tapir/sret-param.ll | 856 + .../Transforms/Tapir/sroa-detached-alloca.ll | 870 + .../Tapir/sroa-preserve-task-info.ll | 51 + llvm/test/Transforms/Tapir/sroa-racy.ll | 29598 ++++++++++++++++ ...a-update-multiple-reattach-predecessors.ll | 61 + .../Transforms/Tapir/stealable-attribute.ll | 866 + llvm/test/Transforms/Tapir/strandpure-licm.ll | 492 + .../Tapir/stripmine-inclusive-range.ll | 358 + llvm/test/Transforms/Tapir/sync-exception.ll | 112 + llvm/test/Transforms/Tapir/sync-merge.ll | 298 + llvm/test/Transforms/Tapir/sync-simplify.ll | 104 + .../Transforms/Tapir/syncreg-debuginfo.ll | 198 + llvm/test/Transforms/Tapir/syncregs.ll | 100 + llvm/test/Transforms/Tapir/tapir-cleanup.ll | 882 + llvm/test/Transforms/Tapir/tapir-licm.ll | 60 + .../Tapir/tapir-loop-metadata-valnotdom.ll | 4622 +++ .../Transforms/Tapir/tapir-loop-metadata.ll | 4622 +++ llvm/test/Transforms/Tapir/tapir-lower-phi.ll | 433 + .../Transforms/Tapir/tapir-runtime-merge.ll | 178 + .../Tapir/task-in-loop-task-exit.ll | 170 + .../Tapir/task-inline-through-invoke.ll | 742 + .../Tapir/task-simplify-domtree-update.ll | 103 + .../Transforms/Tapir/taskframe-csan-csi.ll | 620 + .../Tapir/taskframe-end-debuginfo.ll | 3356 ++ .../Tapir/taskframe-one-block-promote.ll | 216 + .../test/Transforms/Tapir/taskframe-remove.ll | 9813 +++++ .../Tapir/taskframe-resume-in-loop.ll | 7561 ++++ .../Tapir/taskframe-shared-eh-spindle.ll | 1860 + .../Transforms/Tapir/taskframe-shared-exit.ll | 190 + llvm/test/Transforms/Tapir/tre-middle-sync.ll | 203 + .../Transforms/Tapir/tre-syncunwind-lpad.ll | 69 + llvm/test/Transforms/Tapir/tre-syncunwind.ll | 77 + .../Transforms/Tapir/tre-tapir-runtime.ll | 76 + llvm/test/Transforms/Tapir/tre-two-syncs.ll | 256 + llvm/test/Transforms/Tapir/tre.ll | 122 + .../Transforms/Tapir/tsan-detach-invoke.ll | 75 + .../Transforms/Tapir/tsan-task-unreachable.ll | 70 + .../Transforms/Tapir/unroll-task-exits.ll | 125 + llvm/test/Transforms/Tapir/vector-outline.ll | 192 + llvm/test/Transforms/Tapir/wls-licm.ll | 116 + llvm/test/Verifier/invoke.ll | 2 +- llvm/test/lit.cfg.py | 1 + .../tools/llvm-reduce/remove-bbs-tapir.ll | 188 + llvm/tools/bugpoint/CMakeLists.txt | 1 + llvm/tools/bugpoint/bugpoint.cpp | 1 + llvm/tools/gold/gold-plugin.cpp | 28 + llvm/tools/llvm-diff/lib/DifferenceEngine.cpp | 47 + .../llvm-reduce/deltas/ReduceBasicBlocks.cpp | 3 + llvm/tools/opt/CMakeLists.txt | 1 + llvm/tools/opt/opt.cpp | 2 + .../Transforms/Scalar/LoopPassManagerTest.cpp | 2 + .../Utils/LoopRotationUtilsTest.cpp | 4 +- llvm/utils/TableGen/CodeGenIntrinsics.cpp | 18 + llvm/utils/TableGen/CodeGenIntrinsics.h | 10 + llvm/utils/TableGen/IntrinsicEmitter.cpp | 35 +- llvm/utils/emacs/llvm-mode.el | 8 +- runtimes/CMakeLists.txt | 2 +- 819 files changed, 306671 insertions(+), 868 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 MIT_LICENSE.TXT create mode 100644 README_LLVM.md create mode 100644 clang/README.md create mode 100644 clang/include/clang/AST/ExprCilk.h create mode 100644 clang/include/clang/AST/StmtCilk.h create mode 100644 clang/include/clang/Basic/Cilk.h create mode 100644 clang/include/clang/Basic/Tapir.h create mode 100644 clang/include/clang/Driver/Tapir.h create mode 100644 clang/lib/CodeGen/CGCilk.cpp create mode 100644 clang/lib/Driver/Tapir.cpp create mode 100644 clang/lib/Parse/ParseCilk.cpp create mode 100644 clang/lib/Sema/SemaCilk.cpp create mode 100644 clang/test/Cilk/Inputs/libopencilk-abi.bc create mode 100644 clang/test/Cilk/addressof.cpp create mode 100644 clang/test/Cilk/cilk-exceptions.cpp create mode 100644 clang/test/Cilk/cilkfor-bad-input.c create mode 100644 clang/test/Cilk/cilkfor-bounds.cpp create mode 100644 clang/test/Cilk/cilkfor-continue.c create mode 100644 clang/test/Cilk/cilkfor-detach-unwind-rewrite.cpp create mode 100644 clang/test/Cilk/cilkfor-pgo.cpp create mode 100644 clang/test/Cilk/cilkfor-pointer.c create mode 100644 clang/test/Cilk/cilksan-O0.c create mode 100644 clang/test/Cilk/cilkscope-checks.c create mode 100644 clang/test/Cilk/cilkscope.c create mode 100644 clang/test/Cilk/clangchecks.cpp create mode 100644 clang/test/Cilk/constructors.cpp create mode 100644 clang/test/Cilk/early-return-while.c create mode 100644 clang/test/Cilk/early-return.c create mode 100644 clang/test/Cilk/early-return.cpp create mode 100644 clang/test/Cilk/hyper-address.c create mode 100644 clang/test/Cilk/hyper-alias-ctor.cpp create mode 100644 clang/test/Cilk/hyper-array-extern-1.cpp create mode 100644 clang/test/Cilk/hyper-array-extern-2.cpp create mode 100644 clang/test/Cilk/hyper-array-global.cpp create mode 100644 clang/test/Cilk/hyper-array-local.cpp create mode 100644 clang/test/Cilk/hyper-assign.c create mode 100644 clang/test/Cilk/hyper-autoincr.c create mode 100644 clang/test/Cilk/hyper-bare.cpp create mode 100644 clang/test/Cilk/hyper-cast-bad.c create mode 100644 clang/test/Cilk/hyper-complex.c create mode 100644 clang/test/Cilk/hyper-copy.c create mode 100644 clang/test/Cilk/hyper-destruct.cpp create mode 100644 clang/test/Cilk/hyper-errors.c create mode 100644 clang/test/Cilk/hyper-expand1.cpp create mode 100644 clang/test/Cilk/hyper-expand2.cpp create mode 100644 clang/test/Cilk/hyper-expand3.cpp create mode 100644 clang/test/Cilk/hyper-generic.c create mode 100644 clang/test/Cilk/hyper-global-c.c create mode 100644 clang/test/Cilk/hyper-global-ctor-dtor.cpp create mode 100644 clang/test/Cilk/hyper-global-ctor-only.cpp create mode 100644 clang/test/Cilk/hyper-global-dtor-only.cpp create mode 100644 clang/test/Cilk/hyper-new-final.cpp create mode 100644 clang/test/Cilk/hyper-new.cpp create mode 100644 clang/test/Cilk/hyper-no-dtor.cpp create mode 100644 clang/test/Cilk/hyper-nocilk.c create mode 100644 clang/test/Cilk/hyper-overload.cpp create mode 100644 clang/test/Cilk/hyper-param-bad.c create mode 100644 clang/test/Cilk/hyper-param-bad.cpp create mode 100644 clang/test/Cilk/hyper-pointer.c create mode 100644 clang/test/Cilk/hyper-reference.cpp create mode 100644 clang/test/Cilk/hyper-register.c create mode 100644 clang/test/Cilk/hyper-struct-assign.c create mode 100644 clang/test/Cilk/hyper-template-errors.cpp create mode 100644 clang/test/Cilk/hyper-template.cpp create mode 100644 clang/test/Cilk/hyper-template2.cpp create mode 100644 clang/test/Cilk/hyper-unary.c create mode 100644 clang/test/Cilk/hyper-unique.c create mode 100644 clang/test/Cilk/hyper-zero.c create mode 100644 clang/test/Cilk/implicit-sync-scopes.cpp create mode 100644 clang/test/Cilk/implicit-sync.c create mode 100644 clang/test/Cilk/looptest.cpp create mode 100644 clang/test/Cilk/multiple-spawn-args-check.c create mode 100644 clang/test/Cilk/nested-trycatch.cpp create mode 100644 clang/test/Cilk/opencilk-spawn.cpp create mode 100644 clang/test/Cilk/reducer-skip-init.c create mode 100644 clang/test/Cilk/regiontest.c create mode 100644 clang/test/Cilk/spawn-atomic.c create mode 100644 clang/test/Cilk/spawn-builtin.c create mode 100644 clang/test/Cilk/spawn-call-arg.c create mode 100644 clang/test/Cilk/spawn-decl-with-constructors.cpp create mode 100644 clang/test/Cilk/spawn-expr.c create mode 100644 clang/test/Cilk/spawn-in-cilk-for.c create mode 100644 clang/test/Cilk/spawn-return.cpp create mode 100644 clang/test/Cilk/spawn-template.cpp create mode 100644 clang/test/Cilk/spawntest.cpp create mode 100644 clang/test/Cilk/stream-compat.cpp create mode 100644 clang/test/Cilk/syncregion-debug-info.c create mode 100644 clang/test/Cilk/tapirloopattrs.c create mode 100644 clang/test/Cilk/taskframe-always-inline.cpp create mode 100644 clang/test/Cilk/trivial-assign-op.cpp create mode 100644 clang/test/Cilk/unreachable-sync.cpp create mode 100644 clang/test/Cilk/vla-of-hyper.c create mode 100644 clang/test/Cilk/worker-load-test.c create mode 100644 clang/test/Lexer/has_feature_comprehensive_static_instrumentation.cpp create mode 100644 llvm/.gitmodules create mode 100644 llvm/README.md delete mode 100644 llvm/README.txt create mode 100644 llvm/WORKSPACE create mode 100644 llvm/bindings/ocaml/transforms/tapir_opts/CMakeLists.txt create mode 100644 llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.ml create mode 100644 llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.mli create mode 100644 llvm/bindings/ocaml/transforms/tapir_opts/tapir_opts_ocaml.c create mode 100644 llvm/examples/Kaleidoscope/Tapir/CMakeLists.txt create mode 100644 llvm/examples/Kaleidoscope/Tapir/KaleidoscopeJIT.h create mode 100644 llvm/examples/Kaleidoscope/Tapir/toy.cpp create mode 100644 llvm/examples/Kaleidoscope/lib/toylib.c create mode 100644 llvm/include/llvm-c/Transforms/Tapir.h create mode 100644 llvm/include/llvm/Analysis/DataRaceFreeAliasAnalysis.h create mode 100644 llvm/include/llvm/Analysis/TapirRaceDetect.h create mode 100644 llvm/include/llvm/Analysis/TapirTargetFuncs.def create mode 100644 llvm/include/llvm/Analysis/TapirTaskInfo.h create mode 100644 llvm/include/llvm/Analysis/WorkSpanAnalysis.h create mode 100644 llvm/include/llvm/Transforms/Instrumentation/CSI.h create mode 100644 llvm/include/llvm/Transforms/Instrumentation/CilkSanitizer.h create mode 100644 llvm/include/llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h create mode 100644 llvm/include/llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h create mode 100644 llvm/include/llvm/Transforms/Tapir.h create mode 100644 llvm/include/llvm/Transforms/Tapir/CilkABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/CilkRTSCilkFor.h create mode 100644 llvm/include/llvm/Transforms/Tapir/CudaABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/DRFScopedNoAliasAA.h create mode 100644 llvm/include/llvm/Transforms/Tapir/LambdaABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/LoopSpawningTI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/LoopStripMine.h create mode 100644 llvm/include/llvm/Transforms/Tapir/LoopStripMinePass.h create mode 100644 llvm/include/llvm/Transforms/Tapir/LoweringUtils.h create mode 100644 llvm/include/llvm/Transforms/Tapir/OMPTaskABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/OpenMPABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/Outline.h create mode 100644 llvm/include/llvm/Transforms/Tapir/QthreadsABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/SerialABI.h create mode 100644 llvm/include/llvm/Transforms/Tapir/SerializeSmallTasks.h create mode 100644 llvm/include/llvm/Transforms/Tapir/TapirLoopInfo.h create mode 100644 llvm/include/llvm/Transforms/Tapir/TapirTargetIDs.h create mode 100644 llvm/include/llvm/Transforms/Tapir/TapirToTarget.h create mode 100644 llvm/include/llvm/Transforms/Utils/TapirUtils.h create mode 100644 llvm/include/llvm/Transforms/Utils/TaskCanonicalize.h create mode 100644 llvm/include/llvm/Transforms/Utils/TaskSimplify.h create mode 100644 llvm/lib/Analysis/DataRaceFreeAliasAnalysis.cpp create mode 100644 llvm/lib/Analysis/TapirRaceDetect.cpp create mode 100644 llvm/lib/Analysis/TapirTaskInfo.cpp create mode 100644 llvm/lib/Analysis/WorkSpanAnalysis.cpp create mode 100644 llvm/lib/CodeGen/TapirCleanup.cpp create mode 100644 llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp create mode 100644 llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp create mode 100644 llvm/lib/Transforms/Instrumentation/SurgicalInstrumentationConfig.cpp create mode 100644 llvm/lib/Transforms/Tapir/CMakeLists.txt create mode 100644 llvm/lib/Transforms/Tapir/CilkABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/CilkRTSCilkFor.cpp create mode 100644 llvm/lib/Transforms/Tapir/DRFScopedNoAliasAA.cpp create mode 100644 llvm/lib/Transforms/Tapir/LambdaABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp create mode 100644 llvm/lib/Transforms/Tapir/LoopStripMine.cpp create mode 100644 llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp create mode 100644 llvm/lib/Transforms/Tapir/LoweringUtils.cpp create mode 100644 llvm/lib/Transforms/Tapir/OMPTaskABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/OpenCilkABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/Outline.cpp create mode 100644 llvm/lib/Transforms/Tapir/QthreadsABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/SerialABI.cpp create mode 100644 llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp create mode 100644 llvm/lib/Transforms/Tapir/Tapir.cpp create mode 100644 llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp create mode 100644 llvm/lib/Transforms/Tapir/TapirToTarget.cpp create mode 100644 llvm/lib/Transforms/Utils/TapirUtils.cpp create mode 100644 llvm/lib/Transforms/Utils/TaskCanonicalize.cpp create mode 100644 llvm/lib/Transforms/Utils/TaskSimplify.cpp create mode 100644 llvm/test/Analysis/TapirRaceDetect/bitcast-function.ll create mode 100644 llvm/test/Analysis/TapirRaceDetect/check-pointer-with-casts.ll create mode 100644 llvm/test/Analysis/TapirRaceDetect/tapir-rd-objects.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/detach-continue-loop.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/drf-aa-calls.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/interleaved-sync-region.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/memssa-drf.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/shared-eh-spindles.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/simple-spawn.ll create mode 100644 llvm/test/Analysis/TapirTaskInfo/spindle-loop.ll create mode 100644 llvm/test/Bitcode/tapir.ll create mode 100644 llvm/test/CodeGen/AArch64/setjmp-straighten.ll create mode 100644 llvm/test/CodeGen/X86/pr48533.ll create mode 100644 llvm/test/CodeGen/X86/setjmp-straighten.ll create mode 100644 llvm/test/Examples/Kaleidoscope/Tapir.test create mode 100644 llvm/test/Transforms/CSI/bb-cost.ll create mode 100644 llvm/test/Transforms/CSI/csi-global-names.ll create mode 100644 llvm/test/Transforms/Tapir/157.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/allocfreefn-nobuiltin.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-aarch64-neon.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-capture-ptr-inbounds-gep.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-instr-checking.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-loop-exit-loop.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-split-block-preds.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-taskframe-use.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-unreachable-sharedeh.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/constantexpr.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-global_ctor-zeroinit.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/da-gcd-different-loops.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/fib.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/fib_racy.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/finddbgvalues.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/loop-hoist.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/loop-spawning-move-instrumentation.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/nqueens.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/nqueens_racy.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/null-task-for-bb.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/ptr-not-stripped.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/setup-blocks.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/task-analysis-unreachable.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/unreachable-in-loop.ll create mode 100644 llvm/test/Transforms/Tapir/aarch64-threadpointer-opt.ll create mode 100644 llvm/test/Transforms/Tapir/arm64-stealable-setjmp-base-pointer.ll create mode 100644 llvm/test/Transforms/Tapir/asan-parallel-promotable-alloca-check.ll create mode 100644 llvm/test/Transforms/Tapir/atomic-bool-sroa.ll create mode 100644 llvm/test/Transforms/Tapir/canonical-iv-size.ll create mode 100644 llvm/test/Transforms/Tapir/cilk-gxx-personality-inline.ll create mode 100644 llvm/test/Transforms/Tapir/cilk-lowering-complex.ll create mode 100644 llvm/test/Transforms/Tapir/cilk-stackframe-flags-load.ll create mode 100644 llvm/test/Transforms/Tapir/cilkrts-lowering-test.ll create mode 100644 llvm/test/Transforms/Tapir/cilkrts-spawn-large.ll create mode 100644 llvm/test/Transforms/Tapir/cilksan-tapirlower-asan.ll create mode 100644 llvm/test/Transforms/Tapir/codegen-task-frameaddress.ll create mode 100644 llvm/test/Transforms/Tapir/csenorhino.ll create mode 100644 llvm/test/Transforms/Tapir/csi-detach-continue-split.ll create mode 100644 llvm/test/Transforms/Tapir/csi-detach-unwind.ll create mode 100644 llvm/test/Transforms/Tapir/csi-tapir-lowering.ll create mode 100644 llvm/test/Transforms/Tapir/dac-loopspawning-ivs.ll create mode 100644 llvm/test/Transforms/Tapir/dac-loopspawning-simple.ll create mode 100644 llvm/test/Transforms/Tapir/dead-tapir-intrinsics.ll create mode 100644 llvm/test/Transforms/Tapir/declare-main.ll create mode 100644 llvm/test/Transforms/Tapir/detach-gcov.ll create mode 100644 llvm/test/Transforms/Tapir/detach-phi-lowering.ll create mode 100644 llvm/test/Transforms/Tapir/detach-unreachable-with-exceptions.ll create mode 100644 llvm/test/Transforms/Tapir/detach-unwind-cleanup.ll create mode 100644 llvm/test/Transforms/Tapir/empty-scope.ll create mode 100644 llvm/test/Transforms/Tapir/empty.ll create mode 100644 llvm/test/Transforms/Tapir/equal-tripcounts.ll create mode 100644 llvm/test/Transforms/Tapir/exception-loop-spawning.ll create mode 100644 llvm/test/Transforms/Tapir/exception-lowering.ll create mode 100644 llvm/test/Transforms/Tapir/exception-spawn-in-parfor-loop-spawning.ll create mode 100644 llvm/test/Transforms/Tapir/exit.ll create mode 100644 llvm/test/Transforms/Tapir/functionattrs.ll create mode 100644 llvm/test/Transforms/Tapir/get-worker-number-lowering.ll create mode 100644 llvm/test/Transforms/Tapir/gvn-detach-continue.ll create mode 100644 llvm/test/Transforms/Tapir/hyperlookup-opts.ll create mode 100644 llvm/test/Transforms/Tapir/inclusive-range-tapir-loop.ll create mode 100644 llvm/test/Transforms/Tapir/indvar-high-cost-expansion.ll create mode 100644 llvm/test/Transforms/Tapir/inline-after-lowering.ll create mode 100644 llvm/test/Transforms/Tapir/inline-byval.ll create mode 100644 llvm/test/Transforms/Tapir/inline-detachedrethrow-phi.ll create mode 100644 llvm/test/Transforms/Tapir/inline-nounwind-detach-into-invoked-taskframe.ll create mode 100644 llvm/test/Transforms/Tapir/inline-spawn-lpads.ll create mode 100644 llvm/test/Transforms/Tapir/inline-tapir-loop-in-continuation.ll create mode 100644 llvm/test/Transforms/Tapir/inline-task-update-phi.ll create mode 100644 llvm/test/Transforms/Tapir/inline-taskframe-resume.ll create mode 100644 llvm/test/Transforms/Tapir/inline-taskframe-split.ll create mode 100644 llvm/test/Transforms/Tapir/inline-unify-resume.ll create mode 100644 llvm/test/Transforms/Tapir/inlining-personality-2.ll create mode 100644 llvm/test/Transforms/Tapir/inlining-personality.ll create mode 100644 llvm/test/Transforms/Tapir/instcombine-sink-past-sync.ll create mode 100644 llvm/test/Transforms/Tapir/iv-canonicalize.ll create mode 100644 llvm/test/Transforms/Tapir/iv-use-after-loop.ll create mode 100644 llvm/test/Transforms/Tapir/jump-threading-detach-continue-2.ll create mode 100644 llvm/test/Transforms/Tapir/jump-threading-detach-continue.ll create mode 100644 llvm/test/Transforms/Tapir/jump-threading-tapir-vh.ll create mode 100644 llvm/test/Transforms/Tapir/lcssa-loop-task-exit.ll create mode 100644 llvm/test/Transforms/Tapir/libopencilk-abi.bc create mode 100644 llvm/test/Transforms/Tapir/licm-loop-task-exits.ll create mode 100644 llvm/test/Transforms/Tapir/loop-analysis.ll create mode 100644 llvm/test/Transforms/Tapir/loop-control-uses-parameter.ll create mode 100644 llvm/test/Transforms/Tapir/loop-grainsize.ll create mode 100644 llvm/test/Transforms/Tapir/loop-remark-iv.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-alloca-hoist.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-iv-tripcount-type-mismatch.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-iv-types.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-unusual-backedge.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-variant-condition.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine-attrs.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine-clone-sharedeh.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine-epilog-taskframe.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine-iv-tripcount-type-mismatch.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine-unwind.ll create mode 100644 llvm/test/Transforms/Tapir/loop-stripmine.ll create mode 100644 llvm/test/Transforms/Tapir/loop-unknown-tripcount.ll create mode 100644 llvm/test/Transforms/Tapir/loop-unswitch-lcssa.ll create mode 100644 llvm/test/Transforms/Tapir/loop-unswitch.ll create mode 100644 llvm/test/Transforms/Tapir/looplimit.ll create mode 100644 llvm/test/Transforms/Tapir/loops-complex.ll create mode 100644 llvm/test/Transforms/Tapir/loops-with-lcssa-lpads.ll create mode 100644 llvm/test/Transforms/Tapir/loopstripmine-parepil.ll create mode 100644 llvm/test/Transforms/Tapir/lower-eh-unlink.ll create mode 100644 llvm/test/Transforms/Tapir/lower-static-allocas.ll create mode 100644 llvm/test/Transforms/Tapir/lower-unwind-phis.ll create mode 100644 llvm/test/Transforms/Tapir/lowering-arg-struct-attributes.ll create mode 100644 llvm/test/Transforms/Tapir/lowering-nested-detaches-with-exceptions.ll create mode 100644 llvm/test/Transforms/Tapir/lowering-nested-detaches.ll create mode 100644 llvm/test/Transforms/Tapir/lowering-taskframe-resume-critical.ll create mode 100644 llvm/test/Transforms/Tapir/lowering-taskframe-shared-eh.ll create mode 100644 llvm/test/Transforms/Tapir/machine-sink-loop-peel.ll create mode 100644 llvm/test/Transforms/Tapir/machine-sink.ll create mode 100644 llvm/test/Transforms/Tapir/memoryssa-sync.ll create mode 100644 llvm/test/Transforms/Tapir/missed-loop-opts-test.ll create mode 100644 llvm/test/Transforms/Tapir/nested-loop-spawning-with-exceptions.ll create mode 100644 llvm/test/Transforms/Tapir/nested-loop-spawning.ll create mode 100644 llvm/test/Transforms/Tapir/nested-outline-vector-width.ll create mode 100644 llvm/test/Transforms/Tapir/nested-trycatch.ll create mode 100644 llvm/test/Transforms/Tapir/opencilk-lowering-debuginfo.ll create mode 100644 llvm/test/Transforms/Tapir/opencilk-no-bitcode.ll create mode 100644 llvm/test/Transforms/Tapir/outline-ancestor-shared-eh-spindle.ll create mode 100644 llvm/test/Transforms/Tapir/outline-helper-debug.ll create mode 100644 llvm/test/Transforms/Tapir/outline-helper-drop-attrs.ll create mode 100644 llvm/test/Transforms/Tapir/outline-prologue-data.ll create mode 100644 llvm/test/Transforms/Tapir/outline-remap-debug-md.ll create mode 100644 llvm/test/Transforms/Tapir/parallel-licm-norhino.ll create mode 100644 llvm/test/Transforms/Tapir/phi-in-continuation.ll create mode 100644 llvm/test/Transforms/Tapir/preheader-sync-split-analyses.ll create mode 100644 llvm/test/Transforms/Tapir/returned-param.ll create mode 100644 llvm/test/Transforms/Tapir/sentinel-test.ll create mode 100644 llvm/test/Transforms/Tapir/serial-tt-cfg-analysis.ll create mode 100644 llvm/test/Transforms/Tapir/serialize-detach-taskframe-simple.ll create mode 100644 llvm/test/Transforms/Tapir/serialize-small-loop.ll create mode 100644 llvm/test/Transforms/Tapir/setjmp-hoisting.ll create mode 100644 llvm/test/Transforms/Tapir/simple-loop-unswitch.ll create mode 100644 llvm/test/Transforms/Tapir/simplifycfg-syncunwind.ll create mode 100644 llvm/test/Transforms/Tapir/slp-vectorize-long-bb.ll create mode 100644 llvm/test/Transforms/Tapir/spawn-pfor.ll create mode 100644 llvm/test/Transforms/Tapir/sret-param.ll create mode 100644 llvm/test/Transforms/Tapir/sroa-detached-alloca.ll create mode 100644 llvm/test/Transforms/Tapir/sroa-preserve-task-info.ll create mode 100644 llvm/test/Transforms/Tapir/sroa-racy.ll create mode 100644 llvm/test/Transforms/Tapir/ssa-update-multiple-reattach-predecessors.ll create mode 100644 llvm/test/Transforms/Tapir/stealable-attribute.ll create mode 100644 llvm/test/Transforms/Tapir/strandpure-licm.ll create mode 100644 llvm/test/Transforms/Tapir/stripmine-inclusive-range.ll create mode 100644 llvm/test/Transforms/Tapir/sync-exception.ll create mode 100644 llvm/test/Transforms/Tapir/sync-merge.ll create mode 100644 llvm/test/Transforms/Tapir/sync-simplify.ll create mode 100644 llvm/test/Transforms/Tapir/syncreg-debuginfo.ll create mode 100644 llvm/test/Transforms/Tapir/syncregs.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-cleanup.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-licm.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-loop-metadata-valnotdom.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-loop-metadata.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-lower-phi.ll create mode 100644 llvm/test/Transforms/Tapir/tapir-runtime-merge.ll create mode 100644 llvm/test/Transforms/Tapir/task-in-loop-task-exit.ll create mode 100644 llvm/test/Transforms/Tapir/task-inline-through-invoke.ll create mode 100644 llvm/test/Transforms/Tapir/task-simplify-domtree-update.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-csan-csi.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-end-debuginfo.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-one-block-promote.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-remove.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-resume-in-loop.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-shared-eh-spindle.ll create mode 100644 llvm/test/Transforms/Tapir/taskframe-shared-exit.ll create mode 100644 llvm/test/Transforms/Tapir/tre-middle-sync.ll create mode 100644 llvm/test/Transforms/Tapir/tre-syncunwind-lpad.ll create mode 100644 llvm/test/Transforms/Tapir/tre-syncunwind.ll create mode 100644 llvm/test/Transforms/Tapir/tre-tapir-runtime.ll create mode 100644 llvm/test/Transforms/Tapir/tre-two-syncs.ll create mode 100644 llvm/test/Transforms/Tapir/tre.ll create mode 100644 llvm/test/Transforms/Tapir/tsan-detach-invoke.ll create mode 100644 llvm/test/Transforms/Tapir/tsan-task-unreachable.ll create mode 100644 llvm/test/Transforms/Tapir/unroll-task-exits.ll create mode 100644 llvm/test/Transforms/Tapir/vector-outline.ll create mode 100644 llvm/test/Transforms/Tapir/wls-licm.ll create mode 100644 llvm/test/tools/llvm-reduce/remove-bbs-tapir.ll diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000000000..1f3393bbbd87c1 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,53 @@ +--- +name: Bug report +about: Create a report to help us improve OpenCilk +title: '' +labels: bug +assignees: '' + +--- + +### Describe the bug + +A clear and concise description of the bug. + +### Expected behavior + +What you expected to happen. + +### OpenCilk version + +- Release version: _[e.g., 1.0]_ +- Distribution method: _[e.g., `.sh` archive, Docker image, ...]_ + +_-OR-_ + +Built from source: +- `opencilk-project`: `branch` commit# (or tag) +- `cheetah`: `branch` commit# (or tag) +- `productivity-tools`: `branch` commit# (or tag) +- `infrastructure`: `branch` commit# (or tag) _(if applicable)_ + +### System information + +- OS: distribution, version _[e.g., Ubuntu 20.04]_ +- CPU: _[e.g., Intel Xeon Platinum 8260]_ + +### Steps to reproduce (include relevant output) + +1. _[E.g., clone repo X]_ + +2. _[E.g., build with parameters XYZ]_ + + Pass the `-v` flag to OpenCilk `clang`/`clang++` to show verbose compilation + commands and output. + +3. _[E.g., compiler crash output, runtime failure...]_ + +### Working example code + +If applicable, include a working code example which triggers the bug. + +### Additional comments + +Add any other comments about the issue here. diff --git a/.github/workflows/clang-tests.yml b/.github/workflows/clang-tests.yml index 1c85aad64f22d6..e1023e2b025cfa 100644 --- a/.github/workflows/clang-tests.yml +++ b/.github/workflows/clang-tests.yml @@ -9,6 +9,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'clang/**' - '.github/workflows/clang-tests.yml' @@ -18,6 +20,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'clang/**' - '.github/workflows/clang-tests.yml' @@ -32,7 +36,6 @@ concurrency: jobs: check_clang: - if: github.repository_owner == 'llvm' name: Test clang,lldb,libclc uses: ./.github/workflows/llvm-project-tests.yml with: diff --git a/.github/workflows/libclc-tests.yml b/.github/workflows/libclc-tests.yml index 1a29d3284f4f4c..acfdd6dbadda70 100644 --- a/.github/workflows/libclc-tests.yml +++ b/.github/workflows/libclc-tests.yml @@ -9,6 +9,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'libclc/**' - '.github/workflows/libclc-tests.yml' @@ -19,6 +21,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'libclc/**' - '.github/workflows/libclc-tests.yml' @@ -34,7 +38,6 @@ concurrency: jobs: check_libclc: - if: github.repository_owner == 'llvm' name: Test libclc uses: ./.github/workflows/llvm-project-tests.yml with: diff --git a/.github/workflows/lld-tests.yml b/.github/workflows/lld-tests.yml index e806c77df28724..af3bef7cebc9f7 100644 --- a/.github/workflows/lld-tests.yml +++ b/.github/workflows/lld-tests.yml @@ -9,6 +9,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'lld/**' - '.github/workflows/lld-tests.yml' @@ -18,6 +20,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'lld/**' - '.github/workflows/lld-tests.yml' @@ -32,7 +36,6 @@ concurrency: jobs: check_lld: - if: github.repository_owner == 'llvm' name: Test lld uses: ./.github/workflows/llvm-project-tests.yml with: diff --git a/.github/workflows/lldb-tests.yml b/.github/workflows/lldb-tests.yml index 4d96fa501b8629..b6ae85ec40987b 100644 --- a/.github/workflows/lldb-tests.yml +++ b/.github/workflows/lldb-tests.yml @@ -9,6 +9,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'lldb/**' - '.github/workflows/lldb-tests.yml' @@ -19,6 +21,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'lldb/**' - '.github/workflows/lldb-tests.yml' @@ -34,7 +38,6 @@ concurrency: jobs: build_lldb: - if: github.repository_owner == 'llvm' name: Build lldb uses: ./.github/workflows/llvm-project-tests.yml with: diff --git a/.github/workflows/llvm-bugs.yml b/.github/workflows/llvm-bugs.yml index f592dd6ccd9033..181abfbbc1bd47 100644 --- a/.github/workflows/llvm-bugs.yml +++ b/.github/workflows/llvm-bugs.yml @@ -12,7 +12,7 @@ on: jobs: auto-subscribe: runs-on: ubuntu-latest - if: github.repository == 'llvm/llvm-project' + if: github.repository_owner == 'llvm' steps: - uses: actions/setup-node@v3 with: diff --git a/.github/workflows/llvm-project-tests.yml b/.github/workflows/llvm-project-tests.yml index 26a08a70f3db5f..e1af79062d83fa 100644 --- a/.github/workflows/llvm-project-tests.yml +++ b/.github/workflows/llvm-project-tests.yml @@ -28,6 +28,11 @@ concurrency: group: llvm-project-${{ github.workflow }}-${{ inputs.projects }}${{ github.ref }} cancel-in-progress: ${{ startsWith(github.ref, 'refs/pull/') }} +env: + # Workaround for https://github.com/actions/virtual-environments/issues/5900. + # This should be a no-op for non-mac OSes + CPLUS_INCLUDE_PATH: /usr/local/Cellar/llvm@15/15.0.7/include/c++/v1:/usr/local/Cellar/llvm/15.0.7_1/include/c++/v1:/Library/Developer/CommandLineTools/SDKs/MacOSX12.1.sdk/usr/include + jobs: lit-tests: name: Lit Tests @@ -37,9 +42,9 @@ jobs: matrix: os: - ubuntu-latest - # Use windows-2019 due to: - # https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317 - - windows-2019 + # # Use windows-2019 due to: + # # https://developercommunity.visualstudio.com/t/Prev-Issue---with-__assume-isnan-/1597317 + # - windows-2019 # We're using a specific version of macOS due to: # https://github.com/actions/virtual-environments/issues/5900 - macOS-11 @@ -49,6 +54,11 @@ jobs: uses: llvm/actions/setup-windows@main with: arch: amd64 + - name: Check macOS (for debugging) + if: startsWith(matrix.os, 'macos') + run: | + ls /Library/Developer/CommandLineTools/SDKs + find /usr/local -name "cmath*" # On Windows, starting with win19/20220814.1, cmake choose the 32-bit # python3.10.6 libraries instead of the 64-bit libraries when building # lldb. Using this setup-python action to make 3.10 the default diff --git a/.github/workflows/llvm-tests.yml b/.github/workflows/llvm-tests.yml index 5e93f073787196..5bb422aa639b5d 100644 --- a/.github/workflows/llvm-tests.yml +++ b/.github/workflows/llvm-tests.yml @@ -9,6 +9,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'llvm/**' - '.github/workflows/llvm-tests.yml' @@ -17,6 +19,8 @@ on: ignore-forks: true branches: - 'release/**' + - 'dev/**' + - 'ci/**' paths: - 'llvm/**' - '.github/workflows/llvm-tests.yml' @@ -30,7 +34,6 @@ concurrency: jobs: check_all: - if: github.repository_owner == 'llvm' name: Test llvm,clang,libclc uses: ./.github/workflows/llvm-project-tests.yml with: @@ -40,7 +43,6 @@ jobs: # These need to be separate from the check_all job, becuase there is not enough disk # space to build all these projects on Windows. build_lldb: - if: github.repository_owner == 'llvm' name: Build lldb uses: ./.github/workflows/llvm-project-tests.yml with: @@ -48,153 +50,152 @@ jobs: projects: clang;lldb check_lld: - if: github.repository_owner == 'llvm' name: Test lld uses: ./.github/workflows/llvm-project-tests.yml with: build_target: check-lld projects: lld - abi-dump-setup: - if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest - outputs: - BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} - ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} - BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} - LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} - LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} - steps: - - name: Checkout source - uses: actions/checkout@v3 - with: - fetch-depth: 250 + # abi-dump-setup: + # if: github.repository_owner == 'llvm' + # runs-on: ubuntu-latest + # outputs: + # BASELINE_REF: ${{ steps.vars.outputs.BASELINE_REF }} + # ABI_HEADERS: ${{ steps.vars.outputs.ABI_HEADERS }} + # BASELINE_VERSION_MAJOR: ${{ steps.vars.outputs.BASELINE_VERSION_MAJOR }} + # LLVM_VERSION_MAJOR: ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} + # LLVM_VERSION_MINOR: ${{ steps.version.outputs.LLVM_VERSION_MINOR }} + # LLVM_VERSION_PATCH: ${{ steps.version.outputs.LLVM_VERSION_PATCH }} + # steps: + # - name: Checkout source + # uses: actions/checkout@v3 + # with: + # fetch-depth: 250 - - name: Get LLVM version - id: version - uses: llvm/actions/get-llvm-version@main + # - name: Get LLVM version + # id: version + # uses: llvm/actions/get-llvm-version@main - - name: Setup Variables - id: vars - run: | - if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then - echo "BASELINE_VERSION_MAJOR=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))" >> $GITHUB_OUTPUT - echo "ABI_HEADERS=llvm-c" >> $GITHUB_OUTPUT - else - echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.LLVM_VERSION_MAJOR }}" >> $GITHUB_OUTPUT - echo "ABI_HEADERS=." >> $GITHUB_OUTPUT - fi + # - name: Setup Variables + # id: vars + # run: | + # if [ ${{ steps.version.outputs.LLVM_VERSION_MINOR }} -ne 0 -o ${{ steps.version.outputs.LLVM_VERSION_PATCH }} -eq 0 ]; then + # echo "BASELINE_VERSION_MAJOR=$(( ${{ steps.version.outputs.LLVM_VERSION_MAJOR }} - 1))" >> $GITHUB_OUTPUT + # echo "ABI_HEADERS=llvm-c" >> $GITHUB_OUTPUT + # else + # echo "BASELINE_VERSION_MAJOR=${{ steps.version.outputs.LLVM_VERSION_MAJOR }}" >> $GITHUB_OUTPUT + # echo "ABI_HEADERS=." >> $GITHUB_OUTPUT + # fi - abi-dump: - if: github.repository_owner == 'llvm' - needs: abi-dump-setup - runs-on: ubuntu-latest - strategy: - matrix: - name: - - build-baseline - - build-latest - include: - - name: build-baseline - llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} - ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 - repo: llvm/llvm-project - - name: build-latest - llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} - ref: ${{ github.sha }} - repo: ${{ github.repository }} - steps: - - name: Install Ninja - uses: llvm/actions/install-ninja@main - - name: Install abi-compliance-checker - run: | - sudo apt-get install abi-dumper autoconf pkg-config - - name: Install universal-ctags - run: | - git clone https://github.com/universal-ctags/ctags.git - cd ctags - ./autogen.sh - ./configure - sudo make install - - name: Download source code - uses: llvm/actions/get-llvm-project-src@main - with: - ref: ${{ matrix.ref }} - repo: ${{ matrix.repo }} - - name: Configure - run: | - mkdir install - cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=$(pwd)/install llvm - - name: Build - # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). - run: | - ninja -C build install-LLVM - ninja -C build install-LLVM - ninja -C build install-llvm-headers - - name: Dump ABI - run: | - if [ "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" ]; then - nm ./install/lib/libLLVM.so | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" | cut -d ' ' -f 3 > llvm.symbols - # Even though the -symbols-list option doesn't seem to filter out the symbols, I believe it speeds up processing, so I'm leaving it in. - export EXTRA_ARGS="-symbols-list llvm.symbols" - else - touch llvm.symbols - fi - abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so - # Remove symbol versioning from dumps, so we can compare across major versions. - sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi - - name: Upload ABI file - uses: actions/upload-artifact@v3 - with: - name: ${{ matrix.name }} - path: ${{ matrix.ref }}.abi + # abi-dump: + # if: github.repository_owner == 'llvm' + # needs: abi-dump-setup + # runs-on: ubuntu-latest + # strategy: + # matrix: + # name: + # - build-baseline + # - build-latest + # include: + # - name: build-baseline + # llvm_version_major: ${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }} + # ref: llvmorg-${{ needs.abi-dump-setup.outputs.BASELINE_VERSION_MAJOR }}.0.0 + # repo: llvm/llvm-project + # - name: build-latest + # llvm_version_major: ${{ needs.abi-dump-setup.outputs.LLVM_VERSION_MAJOR }} + # ref: ${{ github.sha }} + # repo: ${{ github.repository }} + # steps: + # - name: Install Ninja + # uses: llvm/actions/install-ninja@main + # - name: Install abi-compliance-checker + # run: | + # sudo apt-get install abi-dumper autoconf pkg-config + # - name: Install universal-ctags + # run: | + # git clone https://github.com/universal-ctags/ctags.git + # cd ctags + # ./autogen.sh + # ./configure + # sudo make install + # - name: Download source code + # uses: llvm/actions/get-llvm-project-src@main + # with: + # ref: ${{ matrix.ref }} + # repo: ${{ matrix.repo }} + # - name: Configure + # run: | + # mkdir install + # cmake -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug -DLLVM_TARGETS_TO_BUILD="" -DLLVM_BUILD_LLVM_DYLIB=ON -DCMAKE_C_FLAGS_DEBUG="-g1 -Og" -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Og" -DCMAKE_INSTALL_PREFIX=$(pwd)/install llvm + # - name: Build + # # Need to run install-LLVM twice to ensure the symlink is installed (this is a bug). + # run: | + # ninja -C build install-LLVM + # ninja -C build install-LLVM + # ninja -C build install-llvm-headers + # - name: Dump ABI + # run: | + # if [ "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" ]; then + # nm ./install/lib/libLLVM.so | awk "/T _LLVM/ || /T LLVM/ { print $3 }" | sort -u | sed -e "s/^_//g" | cut -d ' ' -f 3 > llvm.symbols + # # Even though the -symbols-list option doesn't seem to filter out the symbols, I believe it speeds up processing, so I'm leaving it in. + # export EXTRA_ARGS="-symbols-list llvm.symbols" + # else + # touch llvm.symbols + # fi + # abi-dumper $EXTRA_ARGS -lver ${{ matrix.ref }} -skip-cxx -public-headers ./install/include/${{ needs.abi-dump-setup.outputs.ABI_HEADERS }} -o ${{ matrix.ref }}.abi ./install/lib/libLLVM.so + # # Remove symbol versioning from dumps, so we can compare across major versions. + # sed -i 's/LLVM_${{ matrix.llvm_version_major }}/LLVM_NOVERSION/' ${{ matrix.ref }}.abi + # - name: Upload ABI file + # uses: actions/upload-artifact@v3 + # with: + # name: ${{ matrix.name }} + # path: ${{ matrix.ref }}.abi - - name: Upload symbol list file - if: matrix.name == 'build-baseline' - uses: actions/upload-artifact@v3 - with: - name: symbol-list - path: llvm.symbols + # - name: Upload symbol list file + # if: matrix.name == 'build-baseline' + # uses: actions/upload-artifact@v3 + # with: + # name: symbol-list + # path: llvm.symbols - abi-compare: - if: github.repository_owner == 'llvm' - runs-on: ubuntu-latest - needs: - - abi-dump-setup - - abi-dump - steps: - - name: Download baseline - uses: actions/download-artifact@v3 - with: - name: build-baseline - path: build-baseline - - name: Download latest - uses: actions/download-artifact@v3 - with: - name: build-latest - path: build-latest - - name: Download symbol list - uses: actions/download-artifact@v3 - with: - name: symbol-list - path: symbol-list + # abi-compare: + # if: github.repository_owner == 'llvm' + # runs-on: ubuntu-latest + # needs: + # - abi-dump-setup + # - abi-dump + # steps: + # - name: Download baseline + # uses: actions/download-artifact@v3 + # with: + # name: build-baseline + # path: build-baseline + # - name: Download latest + # uses: actions/download-artifact@v3 + # with: + # name: build-latest + # path: build-latest + # - name: Download symbol list + # uses: actions/download-artifact@v3 + # with: + # name: symbol-list + # path: symbol-list - - name: Install abi-compliance-checker - run: sudo apt-get install abi-compliance-checker - - name: Compare ABI - run: | - if [ -s symbol-list/llvm.symbols ]; then - # This option doesn't seem to work with the ABI dumper, so passing it here. - export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" - fi - # FIXME: Reading of gzip'd abi files on the GitHub runners stop - # working some time in March of 2021, likely due to a change in the - # runner's environment. - abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" - - name: Upload ABI Comparison - if: always() - uses: actions/upload-artifact@v3 - with: - name: compat-report-${{ github.sha }} - path: compat_reports/ + # - name: Install abi-compliance-checker + # run: sudo apt-get install abi-compliance-checker + # - name: Compare ABI + # run: | + # if [ -s symbol-list/llvm.symbols ]; then + # # This option doesn't seem to work with the ABI dumper, so passing it here. + # export EXTRA_ARGS="-symbols-list symbol-list/llvm.symbols" + # fi + # # FIXME: Reading of gzip'd abi files on the GitHub runners stop + # # working some time in March of 2021, likely due to a change in the + # # runner's environment. + # abi-compliance-checker $EXTRA_ARGS -l libLLVM.so -old build-baseline/*.abi -new build-latest/*.abi || test "${{ needs.abi-dump-setup.outputs.ABI_HEADERS }}" = "llvm-c" + # - name: Upload ABI Comparison + # if: always() + # uses: actions/upload-artifact@v3 + # with: + # name: compat-report-${{ github.sha }} + # path: compat_reports/ diff --git a/MIT_LICENSE.TXT b/MIT_LICENSE.TXT new file mode 100644 index 00000000000000..902549ed26ab28 --- /dev/null +++ b/MIT_LICENSE.TXT @@ -0,0 +1,29 @@ +=============================================================== +Modifications to the LLVM Project for OpenCilk are licensed under the +MIT License with the OpenCilk Addendum: +=============================================================== + +Copyright (c) 2020 Massachusetts Institute of Technology + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal with the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +---- OpenCilk Addendum to the MIT License ---- + +As an alternative to distributing the Software under this license, you may +distribute the Software under the LLVM license. diff --git a/README.md b/README.md index eb8d624d75cecd..e71e0eb9cf3e79 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,604 @@ -# The LLVM Compiler Infrastructure +# The OpenCilk project -Welcome to the LLVM project! +Welcome to the OpenCilk project! -This repository contains the source code for LLVM, a toolkit for the -construction of highly optimized compilers, optimizers, and run-time -environments. +[***OpenCilk***][SchardlLe23] is a state-of-the-art open-source +implementation of the Cilk task-parallel programming platform. +OpenCilk supports writing fast parallel programs using the Cilk +task-parallel language extensions to C/C++. In addition, OpenCilk +provides a platform to develop compilers, runtime systems, and +program-analysis tools for task-parallel code. -The LLVM project has multiple components. The core of the project is -itself called "LLVM". This contains all of the tools, libraries, and header -files needed to process intermediate representations and convert them into -object files. Tools include an assembler, disassembler, bitcode analyzer, and -bitcode optimizer. +This repository contains the source code for the OpenCilk compiler, +which is based on the [LLVM compiler infrastructure](https://llvm.org/) +and implements the latest official version of +[***Tapir***][SchardlMoLe17], a compiler intermediate representation +(IR) for task parallelism. By using Tapir, the OpenCilk compiler is +able to optimize task-parallel programs more effectively than +mainstream compilers. OpenCilk also contains an efficient +[parallel runtime library](https://github.com/OpenCilk/cheetah), +that automatically schedules and load-balances the Cilk computation, +and a [suite of tools](https://github.com/OpenCilk/productivity-tools), +for Cilk programs, including a race detector and a scalability analyzer. -C-like languages use the [Clang](http://clang.llvm.org/) frontend. This -component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode --- and from there into object files, using LLVM. +This README provides a brief, noncomprehensive overview of how to get +and use OpenCilk. This overview aims to help you get started using +OpenCilk to write fast parallel programs in Cilk. For more information +about OpenCilk, including installation guides, user guides, tutorials, +and references, please see the +[OpenCilk website](https://www.opencilk.org/). -Other components include: -the [libc++ C++ standard library](https://libcxx.llvm.org), -the [LLD linker](https://lld.llvm.org), and more. +## Getting OpenCilk -## Getting the Source Code and Building LLVM +**Supported systems:** OpenCilk has been tested on a variety of +modern x86-64 and 64-bit ARM processors, on recent versions of macOS and +FreeBSD, and on a variety of modern Linux distributions. -Consult the -[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm) -page for information on building and running LLVM. +Releases of OpenCilk are available from the +[OpenCilk releases page](https://github.com/OpenCilk/opencilk-project/releases) +on GitHub. Precompiled builds of OpenCilk for some releases and target +systems can be found on the same page. Instructions to install OpenCilk +from a precompiled binary can be found on the +[install page](https://www.opencilk.org/doc/users-guide/install/) on the +OpenCilk website. -For information on how to contribute to the LLVM project, please take a look at -the [Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide. +The scripts in the +[OpenCilk infrastructure](https://github.com/OpenCilk/infrastructure) +repository make it easy to build a particular release of OpenCilk from +source. For example, the following steps will download the +OpenCilk release tagged `` into the `opencilk` +subdirectory in the current working directory and then build OpenCilk +into the `build` subdirectory of the current working directory: +```console +git clone https://github.com/OpenCilk/infrastructure +infrastructure/tools/get -t $(pwd)/opencilk +infrastructure/tools/build $(pwd)/opencilk $(pwd)/build +``` -## Getting in touch +For more instructions on building OpenCilk from source, see the +[Build OpenCilk from source](https://www.opencilk.org/doc/users-guide/build-opencilk-from-source/) +guide. -Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord -chat](https://discord.gg/xS7Z362), or #llvm IRC channel on -[OFTC](https://oftc.net/). +## Building and running Cilk programs -The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for -participants to all modes of communication within the project. +To use OpenCilk to build and run Cilk programs, include the header file +`cilk/cilk.h` in your program's source code, and then compile and link +your program as you would an ordinary C/C++ program using OpenCilk's +`clang` or `clang++` binary and the additional `-fopencilk` flag. + +For example, on Linux, the following command will build an optimized Cilk +executable `fib` from `fib.c` using OpenCilk, assuming that OpenCilk is +installed at `/opt/opencilk-2`: +```console +/opt/opencilk-2/bin/clang fib.c -o fib -O3 -fopencilk +``` +On macOS, you will need XCode or Command Line Tools installed, to provide +the necessary system headers and libraries, and to preface your compile +and link commands with `xcrun`: +```console +xcrun /opt/opencilk-2/bin/clang fib.c -o fib -O3 -fopencilk +``` + +To run your Cilk program, simply run the resulting executable. +For example: +```console +./fib 40 +``` +You can specify the number of Cilk workers to use by setting the +`CILK_NWORKERS` environment variable. For example, the following +command will run `fib` using 4 Cilk worker threads: +```console +CILK_NWORKERS=4 ./fib 40 +``` + +## A brief introduction to Cilk programming + +Cilk extends C and C++ with a few keywords to expose logical parallelism +in a program. These keywords create parallel subcomputations, or +***tasks***, that are allowed to be scheduled and run simultaneously. +OpenCilk's runtime system automatically schedules and load-balances the +parallel tasks onto parallel processor cores in a shared-memory multicore +using randomized work stealing. + +### Spawning and synchronizing tasks + +The two most primitive Cilk keywords are `cilk_spawn` and `cilk_scope`. +A `cilk_spawn` can be inserted before a function call to allow that call +to execute in parallel with its continuation. A `cilk_scope` defines a +lexical scope in which all spawned subcomputations must complete before +program execution leaves the scope. Cilk supports ***recursive*** +spawning of tasks, in which a task may itself spawn and synchronize +subtasks. + +For example, the following Cilk program shows how one can +parallelize the simple exponential-time algorithm to compute the nth +Fibonacci number using `cilk_spawn` and `cilk_scope`. +```c +#include + +int fib(int n) { + if (n < 2) + return n; + int x, y; + cilk_scope { + x = cilk_spawn fib(n-1); + y = fib(n-2); + } + return x+y; +} +``` +The return value of a `cilk_spawn` is simply the return value of the +spawned function. Accessing the return value of a spawned function +before synchronizing that spawn results in a race. + +One can also spawn functions that do not return a value, as in the +following example: +```cpp +#include +#include + +constexpr std::ptrdiff_t BASE_CASE_LENGTH = 32; + +template void sample_qsort(T* begin, T* end) { + if (end - begin < BASE_CASE_LENGTH) { + std::sort(begin, end); // Base case: Serial sort + } else { + --end; // Exclude last element (pivot) from partition + T* middle = std::partition(begin, end, [pivot=*end](T a) { return a < pivot; }); + std::swap(*end, *middle); // Move pivot to middle + cilk_scope { + cilk_spawn sample_qsort(begin, middle); + sample_qsort(++middle, ++end); // Exclude pivot and restore end + } + } +} +``` + +> [!NOTE] +> OpenCilk also continues to support the `cilk_sync` statement from +> previous versions of Cilk for synchronizing spawned tasks in a function +> without encapsulating those tasks in a lexical scope. + +> [!NOTE] +> The OpenCilk runtime system assumes that all spawned children of any +> function are synchronized before the function returns. The `-fopencilk` +> flag ensures an implicit synchronization at the end of every +> function of that function's spawned children. + +### Parallel loops + +The `cilk_for` keyword can be used to define a parallel loop, in which all +iterations of the loop are allowed to execute simultaneously. In Cilk, +`cilk_for` loops are safe and efficient to nest, as the following example +shows: +```c +#include + +void square_matmul(double *C, const double *A, const double *B, size_t n) { + cilk_for (size_t i = 0; i < n; ++i) { + cilk_for (size_t j = 0; j < n; ++j) { + C[i * n + j] = 0.0; + for (size_t k = 0; k < n; ++k) { + C[i * n + j] += A[i * n + k] * B[k * n + j]; + } + } + } +} +``` + +Internally, the OpenCilk runtime system implements `cilk_for` using +`cilk_spawn` and `cilk_scope` to spawn the `cilk_for` loop iterations +efficiently using a parallel divide-and-conquer algorithm. This efficient +implementation of `cilk_for` requires `-O1`-level compiler optimizations or +higher. + +### The serial projection + +The semantics of a Cilk program can often be understood based on its +***serial projection***, which is the serial program derived by transforming +the Cilk code to replace all of Cilk's task-parallel language constructs with +serial equivalents. Roughly speaking, one can derive the serial projection +of a Cilk program by replacing all `cilk_for`s with ordinary `for`s and +removing all other Cilk language constructs. The serial projection +corresponds with the execution of a Cilk program on a single worker, that is, +with `CILK_NWORKERS=1`. If a Cilk program is deterministic, then all +parallel executions of a Cilk program have the same behavior as its serial +projection. + +## Using OpenCilk's tools + +OpenCilk provides two Cilk-specific tools to check and analyze Cilk programs. +The Cilksan race detector checks Cilk programs dynamically for determinacy +races. The Cilkscale scalability analyzer measures a Cilk program's parallel +scalability. + +In addition, OpenCilk integrates standard tools packaged with LLVM for +analyzing C/C++ programs, including +[Google's Sanitizers](https://github.com/google/sanitizers). You can use +those tools with Cilk programs in the same way that you use them for regular +C/C++ programs. For example, to check your Cilk program for memory errors +using AddressSanitizer, compile and link your Cilk program with +the additional `-fsanitize=address` and then run it normally. + +### Checking for races using Cilksan + +For a given Cilk program and input, Cilksan is guaranteed to either detect a +determinacy race, if one exists, or certify that the program is +determinacy-race free. Cilksan is therefore useful for debugging and +regression-testing race bugs in Cilk programs. + +For each race that Cilksan detects, it will produce a race report that +includes the memory address being raced on and the call stacks of the two +instructions involved in the race. Cilksan will avoid reporting races where +both racing instructions are atomic operations or protected by a common lock. + +To use Cilksan, compile and link the Cilk program with the additional +flag `-fsanitize=cilk`, and then run it normally. It is also recommended +that you compile the Cilk program with debug symbols, by adding the `-g` flag, +to improve the readability of any race reports. + +As an example, here is a Cilksan race report from building and running the +`nqueens` program in the +[OpenCilk tutorial](https://github.com/OpenCilk/tutorial) with Cilksan: +``` +Race detected on location 1112ffd41 +* Read 100ffeb84 nqueens nqueens.c:64:3 +| `-to variable a (declared at nqueens.c:50) ++ Call 100fffb80 nqueens nqueens.c:70:31 ++ Spawn 100ffec8c nqueens nqueens.c:70:31 +|* Write 100ffed14 nqueens nqueens.c:68:12 +|| `-to variable a (declared at nqueens.c:33) +\| Common calling context + + Call 100fffb80 nqueens nqueens.c:70:31 + + Spawn 100ffec8c nqueens nqueens.c:70:31 + + Call 100fff428 main nqueens.c:103:9 + Allocation context + Stack object a (declared at nqueens.c:33) + Alloc 100ffeb60 in nqueens nqueens.c:63:16 + Call 100fffb80 nqueens nqueens.c:70:31 + Spawn 100ffec8c nqueens nqueens.c:70:31 + Call 100fff428 main nqueens.c:103:9 +``` + +> [!NOTE] +> Cilksan is compatible with compiler optimizations. Be advised, however, +> that compiler optimizations can affect debug symbols, which can in turn +> affect Cilksan's race reports. +> +> In addition, the OpenCilk compiler can choose to optimize some parallel +> computation by serializing it, which may eliminate races in the original +> program. The OpenCilk compiler is not allowed to introduce new +> determinacy races into a program through optimizations. + +### Analyzing parallel scalability using Cilkscale + +The Cilkscale scalability analyzer measures the parallel scalability of +a Cilk program. Cilkscale measures the parallel performance of a Cilk +program in terms of ***work*** --- total computation --- and ***span*** +--- length of a longest path of dependencies. Cilkscale uses these +measures to evaluate the program's ***parallelism***, which bounds the +maximum possible parallel speedup the program can achieve on any number +of parallel processors. Cilkscale also produces "burdened" span and +parallelism measurements, which estimate the performance impact of +scheduling overhead. + +To use Cilkscale, compile and link the Cilk program with the additional +flag `-fcilktool=cilkscale`, and then run the program normally. + +By default, Cilkscale reports these measurements in CSV format. Here +is an example of Cilkscale's output. +``` +tag,work (seconds),span (seconds),parallelism,burdened_span (seconds),burdened_parallelism +,2.07768,0.195024,10.6535,0.195386,10.6337 +``` +You can redirect Cilkscale's output to a file by setting the +`CILKSCALE_OUT` environment variable to that filename. + +By default, Cilkscale measures the whole program execution. Cilkscale also +provides a library API, similar to `clock_gettime()`, to measure specific +regions of the program. To measure a particular region in a Cilk program: +1. Include the Cilkscale header file, `cilk/cilkscale.h`, in the source + program. +2. Insert calls to the `wsp_getworkspan()` probe function around the region + of interest. For instance: + ```c + wsp_t start = wsp_getworkspan(); + // Region to measure + wsp_t end = wsp_getworkspan(); + ``` +3. Compute the difference between these probes and output the result, using + the `wsp_sub()` method (or using the `-` operator on the `wsp_t` type in C++) + and the `wsp_dump()` method. For example: + ```c + wsp_t elapsed = wsp_sub(end, start); + wsp_dump(dump, "my region tag"); + ``` + +The `wsp_dump()` function will add a line to the CSV output for the measured +region, tagged with the tag string passed to `wsp_dump()`. For example: +``` +tag,work (seconds),span (seconds),parallelism,burdened_span (seconds),burdened_parallelism +my region tag,1.94387,0.0868964,22.37,0.0871339,22.309 +,2.05014,0.19316,10.6137,0.193398,10.6006 +``` + +Cilkscale can also be used to automatically benchmark a Cilk program on +a range of processor counts and plot those performance results. For more +information on Cilkscale's automatic benchmarking facility, see the +[Cilkscale user guide](https://www.opencilk.org/doc/users-guide/cilkscale/). + +## Advanced Cilk programming features + +OpenCilk supports several advanced parallel-programming features, including +reducer hyperobjects and deterministic parallel random-number generation. + +### Reducer hyperobjects + +OpenCilk supports +[***reducer hyperobjects***](https://dl.acm.org/doi/10.1145/1583991.1584017) +(or ***reducers*** for short) to coordinate parallel modifications to shared +variables. + +Reducers provide a flexible parallel reduction mechanism. When a Cilk +program runs, the OpenCilk runtime system automatically creates new +***views*** of a reducer, each initialized to an ***identity*** value, and +applies parallel modifications to the reducer to these independent views. +As parallel subcomputations complete, the runtime system automatically +combines these views in parallel using a binary ***reduction*** operator. + +A Cilk reducer produces a deterministic result, regardless of how the +program is scheduled at runtime, as long as its identity and reduction +operator define a monoid. In particular, an *associative* reduction +is all that's needed to obtain a deterministic result; the reduction need not +be commutative. + +With OpenCilk, you can define a variable to be a reducer by adding the +keyword `cilk_reducer(I,R)` to its type, where `I` identifies a function +that sets the identity value, and `R` defines the binary reduction. For +example, the following code defines the `sum` variable to be a reducer +by adding `cilk_reducer(zero_i, plus_i)` to its type: +```c +#include + +void zero_i(void *v) { *(int *)v = 0; } +void plus_i(void *l, void *r) { *(int *)l += *(int *)r; } + +int sum_array(int *array, size_t n) { + int cilk_reducer(zero_i, plus_i) sum = 0; + cilk_for (size_t i = 0; i < n; ++i) + sum += array[i]; + return sum; +} +``` +In this example, the function `zero_i` sets the identity value to be the +integer `0`, and `plus_i` defines a binary reduction of adding two +integers. + +### Deterministic parallel random-number generation + +OpenCilk supports deterministic parallel (pseudo)random-number +generation. A deterministic parallel random-number generator (DPRNG) +produces repeatable results across multiple executions of a Cilk +program on a given input, regardless of parallel scheduling. + +OpenCilk provides optimized support for a fast DPRNG. This fast DPRNG +implements the +[DotMix](https://dl.acm.org/doi/10.1145/2145816.2145841) algorithm, which +produces 2-independent pseudorandom numbers. This fast DPRNG provides +two functions: +- The `__cilkrts_dprand_set_seed()` function seeds the DPRNG using a given +64-bit integer seed. +- The `__cilkrts_get_dprand()` function, which returns a 64-bit +pseudorandom value on each call. + +To use this fast DPRNG, include the `cilk/cilk_api.h` header file and +link the program `-lopencilk-pedigrees`. + +For example, the following Cilk program uses this fast DPRNG to +implement a parallel Monte Carlo algorithm for estimating pi: +```cpp +#include +#include +#include +#include + +template void zero(void *v) { + *static_cast(v) = static_cast(0); +} +template void plus(void *l, void *r) { + *static_cast(l) += *static_cast(r); +} + +double estimatePi(int64_t n) { + int64_t cilk_reducer(zero, plus) inside = 0; + + cilk_for (int64_t i = 0; i < n; ++i) { + const double maxValue = static_cast(std::numeric_limits::max()); + + // Get two samples + uint64_t xSample = __cilkrts_get_dprand(); + uint64_t ySample = __cilkrts_get_dprand(); + + double x = static_cast(xSample) / maxValue; + double y = static_cast(ySample) / maxValue; + double m = (x * x) + (y * y); + + // Check if sample is inside of the circle + if (m <= 1) + ++inside; + } + + return 4.0 * static_cast(inside) / static_cast(n); +} +``` + +OpenCilk also supports the +[pedigree runtime mechanism](https://dl.acm.org/doi/10.1145/2145816.2145841) +for user-defined DPRNGs, using the same `cilk/cilk_api.h` header and +`-lopencilk-pedigrees` library. At any point in a Cilk program, the +`__cilkrts_get_pedigree()` function returns the current pedigree in the +form of a singly linked list of `__cilkrts_pedigree` nodes. + +## OpenCilk's system architecture + +The OpenCilk system has three core components: a compiler, a runtime-system +library, and a suite of Cilk tools. + +The OpenCilk compiler (this repository) is based on the +[LLVM compiler infrastructure](https://llvm.org/). +The OpenCilk compiler extends LLVM with [Tapir][SchardlMoLe19], a compiler +IR for task parallelism that enables effective compiler analysis and +optimization of task-parallel programs. Tapir provides a generic +representation of task-parallel control flow that is independent of the +Cilk language and the runtime implementation. + +The OpenCilk [runtime library](https://github.com/OpenCilk/cheetah) +is based on the Cheetah runtime system. This runtime system schedules +and load-balances the Cilk computation using an efficient randomized +work-stealing scheduler. The scheduler offers a +[mathematical guarantee](https://dl.acm.org/doi/10.1145/324133.324234) +to schedule efficiently on the available parallel processors on a +shared-memory multicore. Furthermore, the OpenCilk runtime system +ensures that this theoretical efficiency is borne out in practice. + +The OpenCilk [tool suite](https://github.com/OpenCilk/productivity-tools) +includes two tools for analyzing Cilk programs. The Cilksan +race detector implements an extension of the +[SP-bags algorithm](https://dl.acm.org/doi/10.1145/258492.258493) to +check a Cilk program's execution on a given input for determinacy races. +The Cilkscale scalability analyzer implements a parallel version of +the [Cilkview algorithm](https://dl.acm.org/doi/10.1145/1810479.1810509) +to analyze the parallel scalability of a Cilk program. + +Although all OpenCilk components are integrated with each other, +OpenCilk's system architecture aims to make it easy to modify and extend +individual components. OpenCilk's tools use compiler-inserted +instrumentation hooks that instrument LLVM's IR and Tapir instructions. +Furthermore, the OpenCilk compiler implements a general Tapir-lowering +infrastructure that makes use of LLVM bitcode — a binary representation of +LLVM IR — to make it easy to compile Cilk programs to use different +parallel runtime systems. For more information, see the +[OpenCilk paper][SchardlLe23]. + +## How to cite OpenCilk + +For the OpenCilk system as a whole, cite the +[OpenCilk conference paper][SchardlLe23] at ACM PPoPP 2023: +> Tao B. Schardl and I-Ting Angelina Lee. 2023. OpenCilk: A Modular +> and Extensible Software Infrastructure for Fast Task-Parallel Code. +> In Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles +> and Practice of Parallel Programming (PPoPP '23). 189–203. +> https://doi.org/10.1145/3572848.3577509 + +BibTeX: +```bibtex +@inproceedings{SchardlLe23, +author = {Schardl, Tao B. and Lee, I-Ting Angelina}, +title = {OpenCilk: A Modular and Extensible Software Infrastructure for Fast Task-Parallel Code}, +year = {2023}, +isbn = {9798400700156}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3572848.3577509}, +doi = {10.1145/3572848.3577509}, +booktitle = {Proceedings of the 28th ACM SIGPLAN Annual Symposium on Principles and Practice of Parallel Programming}, +pages = {189–-203}, +numpages = {15}, +keywords = {bitcode, parallel runtime system, cilk, productivity tools, compiler-inserted instrumentation, tapir, compiling, task parallelism, fork-join parallelism, OpenCilk, oneTBB, OpenMP, parallel computing}, +location = {Montreal, QC, Canada}, +series = {PPoPP '23} +} +``` + +For the Tapir compiler IR, cite either the +[Tapir conference paper][SchardlMoLe17] at ACM PPoPP 2017 conference +paper or the [Tapir journal paper][SchardlMoLe19] in ACM TOPC 2019. + +Tapir conference paper, ACM PPoPP 2017: +> Tao B. Schardl, William S. Moses, and Charles E. Leiserson. 2017. +> Tapir: Embedding Fork-Join Parallelism into LLVM's Intermediate +> Representation. In Proceedings of the 22nd ACM SIGPLAN Symposium +> on Principles and Practice of Parallel Programming (PPoPP '17). +> 249–265. https://doi.org/10.1145/3018743.3018758 + +BibTeX: +```bibtex +@inproceedings{SchardlMoLe17, +author = {Schardl, Tao B. and Moses, William S. and Leiserson, Charles E.}, +title = {Tapir: Embedding Fork-Join Parallelism into LLVM's Intermediate Representation}, +year = {2017}, +isbn = {9781450344937}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +url = {https://doi.org/10.1145/3018743.3018758}, +doi = {10.1145/3018743.3018758}, +booktitle = {Proceedings of the 22nd ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, +pages = {249–-265}, +numpages = {17}, +keywords = {control-flow graph, multicore, tapir, openmp, fork-join parallelism, cilk, optimization, serial semantics, llvm, par- allel computing, compiling}, +location = {Austin, Texas, USA}, +series = {PPoPP '17} +} +``` + +Journal article about Tapir, ACM TOPC 2019: +> Tao B. Schardl, William S. Moses, and Charles E. Leiserson. 2019. +> Tapir: Embedding Recursive Fork-join Parallelism into LLVM’s +> Intermediate Representation. ACM Transactions on Parallel Computing 6, +> 4, Article 19 (December 2019), 33 pages. https://doi.org/10.1145/3365655 + +BibTeX: +```bibtex +@article{SchardlMoLe19, +author = {Schardl, Tao B. and Moses, William S. and Leiserson, Charles E.}, +title = {Tapir: Embedding Recursive Fork-Join Parallelism into LLVM’s Intermediate Representation}, +year = {2019}, +issue_date = {December 2019}, +publisher = {Association for Computing Machinery}, +address = {New York, NY, USA}, +volume = {6}, +number = {4}, +issn = {2329-4949}, +url = {https://doi.org/10.1145/3365655}, +doi = {10.1145/3365655}, +journal = {ACM Transactions on Parallel Computing}, +month = {dec}, +articleno = {19}, +numpages = {33}, +keywords = {compiling, fork-join parallelism, Tapir, control-flow graph, optimization, parallel computing, OpenMP, multicore, Cilk, serial-projection property, LLVM} +} +``` + +## How to reach us + +Found a bug in OpenCilk? Please report it on the +[issue tracker](https://github.com/OpenCilk/opencilk-project/issues). + +Have a question or comment? Start a thread on the +[Discussions page](https://github.com/orgs/OpenCilk/discussions) or send us +an email at [contact@opencilk.org](mailto:contact@opencilk.org). + +Want to contribute to the OpenCilk project? We welcome your +contributions! Check out the +[contribute page](https://www.opencilk.org/contribute/) on the OpenCilk +website for more information. + +## Acknowledgments + +OpenCilk is supported in part by the National Science Foundation, +under grant number CCRI-1925609, and in part by the +[USAF-MIT AI Accelerator](https://aia.mit.edu/), which is sponsored by the +United States Air Force Research Laboratory under Cooperative Agreement +Number FA8750-19-2-1000. + +Any opinions, findings, and conclusions or recommendations expressed +in this material are those of the author(s) and should not be +interpreted as representing the official policies or views, either +expressed or implied, of the United states Air Force, the +U.S. Government, or the National Science Foundation. The +U.S. Government is authorized to reproduce and distribute reprints for +Government purposes notwithstanding any copyright notation herein. + +[SchardlLe23]: https://dl.acm.org/doi/10.1145/3572848.3577509 +[SchardlMoLe17]: https://dl.acm.org/doi/10.1145/3155284.3018758 +[SchardlMoLe19]: https://dl.acm.org/doi/10.1145/3365655 diff --git a/README_LLVM.md b/README_LLVM.md new file mode 100644 index 00000000000000..eb8d624d75cecd --- /dev/null +++ b/README_LLVM.md @@ -0,0 +1,39 @@ +# The LLVM Compiler Infrastructure + +Welcome to the LLVM project! + +This repository contains the source code for LLVM, a toolkit for the +construction of highly optimized compilers, optimizers, and run-time +environments. + +The LLVM project has multiple components. The core of the project is +itself called "LLVM". This contains all of the tools, libraries, and header +files needed to process intermediate representations and convert them into +object files. Tools include an assembler, disassembler, bitcode analyzer, and +bitcode optimizer. + +C-like languages use the [Clang](http://clang.llvm.org/) frontend. This +component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode +-- and from there into object files, using LLVM. + +Other components include: +the [libc++ C++ standard library](https://libcxx.llvm.org), +the [LLD linker](https://lld.llvm.org), and more. + +## Getting the Source Code and Building LLVM + +Consult the +[Getting Started with LLVM](https://llvm.org/docs/GettingStarted.html#getting-the-source-code-and-building-llvm) +page for information on building and running LLVM. + +For information on how to contribute to the LLVM project, please take a look at +the [Contributing to LLVM](https://llvm.org/docs/Contributing.html) guide. + +## Getting in touch + +Join the [LLVM Discourse forums](https://discourse.llvm.org/), [Discord +chat](https://discord.gg/xS7Z362), or #llvm IRC channel on +[OFTC](https://oftc.net/). + +The LLVM project has adopted a [code of conduct](https://llvm.org/docs/CodeOfConduct.html) for +participants to all modes of communication within the project. diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index f7936d72e08827..713d7095ef1991 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -238,6 +238,9 @@ set(CLANG_DEFAULT_OPENMP_RUNTIME "libomp" CACHE STRING set(CLANG_SYSTEMZ_DEFAULT_ARCH "z10" CACHE STRING "SystemZ Default Arch") +set(CLANG_DEFAULT_TAPIR_RUNTIME "cilk" CACHE STRING + "Default Tapir runtime used by -ftapir.") + set(CLANG_VENDOR ${PACKAGE_VENDOR} CACHE STRING "Vendor-specific text for showing with version information.") diff --git a/clang/CodeOwners.rst b/clang/CodeOwners.rst index d7f6b4e56a440f..6ea0da881da384 100644 --- a/clang/CodeOwners.rst +++ b/clang/CodeOwners.rst @@ -255,6 +255,10 @@ SYCL conformance | Alexey Bader | alexey.bader\@intel.com (email), bader (Phabricator), bader (GitHub) +Cilk frontend +~~~~~~~~~~~~~~~~ +| Tao B. Schardl +| neboat\@mit.edu (email), neboat (GitHub) Former Code Owners ================== diff --git a/clang/README.md b/clang/README.md new file mode 100644 index 00000000000000..836f2fcd0295f2 --- /dev/null +++ b/clang/README.md @@ -0,0 +1,38 @@ +Cilk-Clang +================================ + +This version of Clang supports the `_Cilk_spawn`, `_Cilk_sync`, and +`_Cilk_for` keywords from Cilk. In particular, this version of Clang +supports the use of _Cilk_spawn before a function call in a statement, +an assignment, or a declaration, as in the following examples: + +``` +_Cilk_spawn foo(n); +``` + +``` +x = _Cilk_spawn foo(n); +``` + +``` +int x = _Cilk_spawn foo(n); +``` + +When spawning a function call, the call arguments and function +arguments are evaluated before the spawn occurs. When spawning an +assignment or declaration, the LHS is also evaluated before the spawn +occurs. + +For convenience, this version of Clang allows `_Cilk_spawn` to spawn an +arbitrary statement, as follows: + +``` +_Cilk_spawn { x = foo(n); } +``` + +Please use this syntax with caution! When spawning an arbitrary +statement, the spawn occurs before the evaluation of any part of the +spawned statement. Furthermore, some statements, such as `goto`, are +not legal to spawn. In the future, we will add checks to catch +illegal uses of `_Cilk_spawn`. + diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index ff386d2094a0b8..62176059c4c90f 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -1343,6 +1343,21 @@ def __repr__(self): # OpenMP teams distribute directive. CursorKind.OMP_TEAMS_DISTRIBUTE_DIRECTIVE = CursorKind(271) +# Cilk cilk_spawn statement. +CursorKind.CILK_SPAWN_STMT = CursorKind(296) + +# Cilk wrapper for an expression preceded by cilk_spawn. +CursorKind.CILK_SPAWN_EXPR = CursorKind(297) + +# Cilk cilk_sync statement. +CursorKind.CILK_SYNC_STMT = CursorKind(298) + +# Cilk cilk_for statement. +CursorKind.CILK_FOR_STMT = CursorKind(299) + +# Cilk cilk_scope statement. +CursorKind.CILK_SCOPE_STMT = CursorKind(300) + ### # Other Kinds diff --git a/clang/bindings/python/tests/cindex/test_diagnostics.py b/clang/bindings/python/tests/cindex/test_diagnostics.py index 57c41baaa25419..8e2159c6c2bc71 100644 --- a/clang/bindings/python/tests/cindex/test_diagnostics.py +++ b/clang/bindings/python/tests/cindex/test_diagnostics.py @@ -78,7 +78,7 @@ def test_diagnostic_category(self): self.assertEqual(d.location.line, 1) self.assertEqual(d.location.column, 11) - self.assertEqual(d.category_number, 2) + self.assertEqual(d.category_number, 3) self.assertEqual(d.category_name, "Semantic Issue") def test_diagnostic_option(self): diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 601b91f67d6588..6042737761aff2 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2136,7 +2136,27 @@ enum CXCursorKind { */ CXCursor_OMPErrorDirective = 305, - CXCursor_LastStmt = CXCursor_OMPErrorDirective, + /** A _Cilk_spawn statement. + */ + CXCursor_CilkSpawnStmt = 306, + + /** Wrapper for an expression preceded by _Cilk_spawn. + */ + CXCursor_CilkSpawnExpr = 307, + + /** A _Cilk_sync statement. + */ + CXCursor_CilkSyncStmt = 308, + + /** A _Cilk_for statement. + */ + CXCursor_CilkForStmt = 309, + + /** A _Cilk_scope statement. + */ + CXCursor_CilkScopeStmt = 310, + + CXCursor_LastStmt = CXCursor_CilkScopeStmt, /** * Cursor that represents the translation unit itself. @@ -2948,7 +2968,9 @@ enum CXTypeKind { CXType_ExtVector = 176, CXType_Atomic = 177, - CXType_BTFTagAttributed = 178 + CXType_BTFTagAttributed = 178, + + CXType_Hyperobject = 179 }; /** diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 40cadd93158c68..1a0175b5e17118 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -185,6 +185,7 @@ class ASTContext : public RefCountedBase { mutable SmallVector Types; mutable llvm::FoldingSet ExtQualNodes; mutable llvm::FoldingSet ComplexTypes; + mutable llvm::FoldingSet HyperobjectTypes; mutable llvm::FoldingSet PointerTypes{GeneralTypesLog2InitSize}; mutable llvm::FoldingSet AdjustedTypes; mutable llvm::FoldingSet BlockPointerTypes; @@ -1317,6 +1318,11 @@ class ASTContext : public RefCountedBase { return CanQualType::CreateUnsafe(getComplexType((QualType) T)); } + QualType getHyperobjectType(QualType T, Expr *R, Expr *I) const; + CanQualType getHyperobjectType(CanQualType T, Expr *R, Expr *I) const { + return CanQualType::CreateUnsafe(getHyperobjectType((QualType) T, R, I)); + } + /// Return the uniqued reference to the type for a pointer to /// the specified type. QualType getPointerType(QualType T) const; diff --git a/clang/include/clang/AST/ASTNodeTraverser.h b/clang/include/clang/AST/ASTNodeTraverser.h index d649ef6816ca42..6839fb87d51239 100644 --- a/clang/include/clang/AST/ASTNodeTraverser.h +++ b/clang/include/clang/AST/ASTNodeTraverser.h @@ -339,6 +339,13 @@ class ASTNodeTraverser } } + void VisitHyperobjectType(const HyperobjectType *T) { + Visit(T->getElementType()); + if (T->getIdentity()) + Visit(T->getIdentity()); + if (T->getReduce()) + Visit(T->getReduce()); + } void VisitComplexType(const ComplexType *T) { Visit(T->getElementType()); } void VisitLocInfoType(const LocInfoType *T) { Visit(T->getTypeSourceInfo()->getType()); diff --git a/clang/include/clang/AST/ComputeDependence.h b/clang/include/clang/AST/ComputeDependence.h index f62611cb4c3cf7..eec0a0e0e6669e 100644 --- a/clang/include/clang/AST/ComputeDependence.h +++ b/clang/include/clang/AST/ComputeDependence.h @@ -106,6 +106,7 @@ class ObjCSubscriptRefExpr; class ObjCIsaExpr; class ObjCIndirectCopyRestoreExpr; class ObjCMessageExpr; +class CilkSpawnExpr; // The following functions are called from constructors of `Expr`, so they // should not access anything beyond basic @@ -202,5 +203,7 @@ ExprDependence computeDependence(ObjCIsaExpr *E); ExprDependence computeDependence(ObjCIndirectCopyRestoreExpr *E); ExprDependence computeDependence(ObjCMessageExpr *E); +ExprDependence computeDependence(CilkSpawnExpr *E); + } // namespace clang #endif diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index 788f6ab97b1bbf..78ed157640dde6 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1362,6 +1362,8 @@ class VarDecl : public DeclaratorDecl, public Redeclarable { /// Returns a pointer to the value if evaluation succeeded, 0 otherwise. APValue *evaluateValue() const; + bool isReducer() const; + private: APValue *evaluateValueImpl(SmallVectorImpl &Notes, bool IsConstantInitialization) const; diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h index f9795b6386c46f..2a17289a39db4a 100644 --- a/clang/include/clang/AST/Expr.h +++ b/clang/include/clang/AST/Expr.h @@ -299,6 +299,7 @@ class Expr : public ValueStmt { MLV_ConstQualified, MLV_ConstQualifiedField, MLV_ConstAddrSpace, + MLV_HyperobjectField, MLV_ArrayType, MLV_NoSetterProperty, MLV_MemberFunction, @@ -350,6 +351,7 @@ class Expr : public ValueStmt { CM_ConstQualifiedField, CM_ConstAddrSpace, CM_ArrayType, + CM_HyperobjectField, CM_IncompleteType }; diff --git a/clang/include/clang/AST/ExprCilk.h b/clang/include/clang/AST/ExprCilk.h new file mode 100644 index 00000000000000..3ed5441f5cf21f --- /dev/null +++ b/clang/include/clang/AST/ExprCilk.h @@ -0,0 +1,71 @@ +//===--- ExprCilk.h - Classes for representing Cilk expressions -*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Defines the clang::Expr interface and subclasses for Cilk expressions. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_EXPRCILK_H +#define LLVM_CLANG_AST_EXPRCILK_H + +#include "clang/AST/Expr.h" + +namespace clang { + +/// CilkSpawnExpr - Wrapper for expressions whose evaluation is spawned. +/// +class CilkSpawnExpr : public Expr { + Stmt *SpawnedExpr; + SourceLocation SpawnLoc; + +public: + CilkSpawnExpr(SourceLocation SpawnLoc, Expr *SpawnedExpr) + : Expr(CilkSpawnExprClass, SpawnedExpr->getType(), + SpawnedExpr->getValueKind(), SpawnedExpr->getObjectKind()), + SpawnedExpr(SpawnedExpr), SpawnLoc(SpawnLoc) { + setDependence(computeDependence(this)); + } + + explicit CilkSpawnExpr(EmptyShell Empty) + : Expr(CilkSpawnExprClass, Empty) { } + + const Expr *getSpawnedExpr() const { return cast(SpawnedExpr); } + Expr *getSpawnedExpr() { return cast(SpawnedExpr); } + void setSpawnedExpr(Expr *E) { SpawnedExpr = E; } + + /// \brief Retrieve the location of this expression. + SourceLocation getSpawnLoc() const { return SpawnLoc; } + void setSpawnLoc(SourceLocation L) { SpawnLoc = L; } + + SourceLocation getBeginLoc() const LLVM_READONLY { + return SpawnedExpr->getBeginLoc(); + } + SourceLocation getEndLoc() const LLVM_READONLY { + return SpawnedExpr->getEndLoc(); + } + SourceLocation getExprLoc() const LLVM_READONLY { + return cast(SpawnedExpr)->getExprLoc(); + } + + // Iterators + child_range children() { + return child_range(&SpawnedExpr, &SpawnedExpr+1); + } + + const_child_range children() const { + return const_child_range(&SpawnedExpr, &SpawnedExpr+1); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CilkSpawnExprClass; + } +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/AST/NonTrivialTypeVisitor.h b/clang/include/clang/AST/NonTrivialTypeVisitor.h index cf320c8a478af3..9d0724843dec55 100644 --- a/clang/include/clang/AST/NonTrivialTypeVisitor.h +++ b/clang/include/clang/AST/NonTrivialTypeVisitor.h @@ -34,6 +34,8 @@ template struct DestructedTypeVisitor { return asDerived().visitStruct(FT, std::forward(Args)...); case QualType::DK_none: return asDerived().visitTrivial(FT, std::forward(Args)...); + case QualType::DK_hyperobject: + llvm_unreachable("hyperobject destruction not implemented"); case QualType::DK_cxx_destructor: return asDerived().visitCXXDestructor(FT, std::forward(Args)...); case QualType::DK_objc_weak_lifetime: diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 604875cd6337a4..cb20d64f19f077 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -25,6 +25,7 @@ #include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -32,6 +33,7 @@ #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" @@ -969,6 +971,14 @@ DEF_TRAVERSE_TYPE(BuiltinType, {}) DEF_TRAVERSE_TYPE(ComplexType, { TRY_TO(TraverseType(T->getElementType())); }) +DEF_TRAVERSE_TYPE(HyperobjectType, { + TRY_TO(TraverseType(T->getElementType())); + if (Stmt *I = T->getIdentity()) + TRY_TO(TraverseStmt(I)); + if (Stmt *R = T->getReduce()) + TRY_TO(TraverseStmt(R)); + }) + DEF_TRAVERSE_TYPE(PointerType, { TRY_TO(TraverseType(T->getPointeeType())); }) DEF_TRAVERSE_TYPE(BlockPointerType, @@ -1211,6 +1221,15 @@ DEF_TRAVERSE_TYPELOC(ComplexType, { TRY_TO(TraverseType(TL.getTypePtr()->getElementType())); }) +DEF_TRAVERSE_TYPELOC(HyperobjectType, { + const HyperobjectType *H = TL.getTypePtr(); + TRY_TO(TraverseType(H->getElementType())); + if (Stmt *I = H->getIdentity()) + TRY_TO(TraverseStmt(I)); + if (Stmt *R = H->getReduce()) + TRY_TO(TraverseStmt(R)); +}) + DEF_TRAVERSE_TYPELOC(PointerType, { TRY_TO(TraverseTypeLoc(TL.getPointeeLoc())); }) @@ -2845,6 +2864,12 @@ DEF_TRAVERSE_STMT(TypoExpr, {}) DEF_TRAVERSE_STMT(RecoveryExpr, {}) DEF_TRAVERSE_STMT(CUDAKernelCallExpr, {}) +DEF_TRAVERSE_STMT(CilkSpawnStmt, {}) +DEF_TRAVERSE_STMT(CilkSpawnExpr, {}) +DEF_TRAVERSE_STMT(CilkSyncStmt, {}) +DEF_TRAVERSE_STMT(CilkForStmt, {}) +DEF_TRAVERSE_STMT(CilkScopeStmt, {}) + // These operators (all of them) do not need any action except // iterating over the children. DEF_TRAVERSE_STMT(BinaryConditionalOperator, {}) diff --git a/clang/include/clang/AST/StmtCilk.h b/clang/include/clang/AST/StmtCilk.h new file mode 100644 index 00000000000000..8814d22f318bec --- /dev/null +++ b/clang/include/clang/AST/StmtCilk.h @@ -0,0 +1,240 @@ +//===- StmtCilk.h - Classes for Cilk statements -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file defines Cilk AST classes for executable statements and clauses. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_AST_STMTCILK_H +#define LLVM_CLANG_AST_STMTCILK_H + +#include "clang/AST/Stmt.h" +#include "clang/Basic/SourceLocation.h" + +namespace clang { + +/// CilkSpawnStmt - This represents a _Cilk_spawn. +/// +class CilkSpawnStmt : public Stmt { + SourceLocation SpawnLoc; + Stmt *SpawnedStmt; + +public: + explicit CilkSpawnStmt(SourceLocation SL) : CilkSpawnStmt(SL, nullptr) {} + + CilkSpawnStmt(SourceLocation SL, Stmt *S) + : Stmt(CilkSpawnStmtClass), SpawnLoc(SL), SpawnedStmt(S) { } + + // Build an empty _Cilk_spawn statement. + explicit CilkSpawnStmt(EmptyShell Empty) : Stmt(CilkSpawnStmtClass, Empty) {} + + const Stmt *getSpawnedStmt() const { return SpawnedStmt; } + Stmt *getSpawnedStmt() { return SpawnedStmt; } + void setSpawnedStmt(Stmt *S) { SpawnedStmt = S; } + + SourceLocation getSpawnLoc() const { return SpawnLoc; } + void setSpawnLoc(SourceLocation L) { SpawnLoc = L; } + + SourceLocation getBeginLoc() const LLVM_READONLY { return SpawnLoc; } + SourceLocation getEndLoc() const LLVM_READONLY { + return SpawnedStmt->getEndLoc(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CilkSpawnStmtClass; + } + + // Iterators + child_range children() { + return child_range(&SpawnedStmt, &SpawnedStmt+1); + } + + const_child_range children() const { + return const_child_range(&SpawnedStmt, &SpawnedStmt+1); + } +}; + +/// CilkSyncStmt - This represents a _Cilk_sync. +/// +class CilkSyncStmt : public Stmt { + SourceLocation SyncLoc; + +public: + CilkSyncStmt(SourceLocation SL) : Stmt(CilkSyncStmtClass) { + setSyncLoc(SL); + } + + // Build an empty _Cilk_sync statement. + explicit CilkSyncStmt(EmptyShell Empty) : Stmt(CilkSyncStmtClass, Empty) { } + + SourceLocation getSyncLoc() const { return SyncLoc; } + void setSyncLoc(SourceLocation L) { SyncLoc = L; } + + SourceLocation getBeginLoc() const LLVM_READONLY { return getSyncLoc(); } + SourceLocation getEndLoc() const LLVM_READONLY { return getSyncLoc(); } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CilkSyncStmtClass; + } + + // Iterators + child_range children() { + return child_range(child_iterator(), child_iterator()); + } + + const_child_range children() const { + return const_child_range(const_child_iterator(), const_child_iterator()); + } +}; + +/// CilkForStmt - This represents a '_Cilk_for(init;cond;inc)' stmt. +class CilkForStmt : public Stmt { + SourceLocation CilkForLoc; + enum { INIT, LIMIT, INITCOND, BEGINSTMT, ENDSTMT, COND, INC, LOOPVAR, BODY, END }; + Stmt* SubExprs[END]; // SubExprs[INIT] is an expression or declstmt. + SourceLocation LParenLoc, RParenLoc; + +public: + CilkForStmt(Stmt *Init, DeclStmt *Limit, Expr *InitCond, DeclStmt *Begin, + DeclStmt *End, Expr *Cond, Expr *Inc, DeclStmt *LoopVar, + Stmt *Body, SourceLocation CFL, SourceLocation LP, + SourceLocation RP); + + /// Build an empty _Cilk_for statement. + explicit CilkForStmt(EmptyShell Empty) : Stmt(CilkForStmtClass, Empty) { } + + Stmt *getInit() { return SubExprs[INIT]; } + + // /// Retrieve the variable declared in this "for" statement, if any. + // /// + // /// In the following example, "y" is the condition variable. + // /// \code + // /// for (int x = random(); int y = mangle(x); ++x) { + // /// // ... + // /// } + // /// \endcode + // VarDecl *getConditionVariable() const; + // void setConditionVariable(const ASTContext &C, VarDecl *V); + + // /// If this CilkForStmt has a condition variable, return the faux DeclStmt + // /// associated with the creation of that condition variable. + // const DeclStmt *getConditionVariableDeclStmt() const { + // return reinterpret_cast(SubExprs[CONDVAR]); + // } + + DeclStmt *getLimitStmt() { + return cast_or_null(SubExprs[LIMIT]); + } + Expr *getInitCond() { return cast_or_null(SubExprs[INITCOND]); } + DeclStmt *getBeginStmt() { + return cast_or_null(SubExprs[BEGINSTMT]); + } + DeclStmt *getEndStmt() { return cast_or_null(SubExprs[ENDSTMT]); } + Expr *getCond() { return reinterpret_cast(SubExprs[COND]); } + Expr *getInc() { return reinterpret_cast(SubExprs[INC]); } + DeclStmt *getLoopVarStmt() { + return cast_or_null(SubExprs[LOOPVAR]); + } + Stmt *getBody() { return SubExprs[BODY]; } + + const Stmt *getInit() const { return SubExprs[INIT]; } + const DeclStmt *getLimitStmt() const { + return cast_or_null(SubExprs[LIMIT]); + } + const Expr *getInitCond() const { + return cast_or_null(SubExprs[INITCOND]); + } + const DeclStmt *getBeginStmt() const { + return cast_or_null(SubExprs[BEGINSTMT]); + } + const DeclStmt *getEndStmt() const { + return cast_or_null(SubExprs[ENDSTMT]); + } + const Expr *getCond() const { return reinterpret_cast(SubExprs[COND]);} + const Expr *getInc() const { return reinterpret_cast(SubExprs[INC]); } + const DeclStmt *getLoopVarStmt() const { + return cast_or_null(SubExprs[LOOPVAR]); + } + const Stmt *getBody() const { return SubExprs[BODY]; } + + void setInit(Stmt *S) { SubExprs[INIT] = S; } + void setLimitStmt(Stmt *S) { SubExprs[LIMIT] = S; } + void setInitCond(Expr *E) { SubExprs[INITCOND] = reinterpret_cast(E); } + void setBeginStmt(Stmt *S) { SubExprs[BEGINSTMT] = S; } + void setEndStmt(Stmt *S) { SubExprs[ENDSTMT] = S; } + void setCond(Expr *E) { SubExprs[COND] = reinterpret_cast(E); } + void setInc(Expr *E) { SubExprs[INC] = reinterpret_cast(E); } + void setLoopVarStmt(Stmt *S) { SubExprs[LOOPVAR] = S; } + void setBody(Stmt *S) { SubExprs[BODY] = S; } + + SourceLocation getCilkForLoc() const { return CilkForLoc; } + void setCilkForLoc(SourceLocation L) { CilkForLoc = L; } + SourceLocation getLParenLoc() const { return LParenLoc; } + void setLParenLoc(SourceLocation L) { LParenLoc = L; } + SourceLocation getRParenLoc() const { return RParenLoc; } + void setRParenLoc(SourceLocation L) { RParenLoc = L; } + + SourceLocation getBeginLoc() const LLVM_READONLY { return getCilkForLoc(); } + SourceLocation getEndLoc() const LLVM_READONLY { + return getBody()->getEndLoc(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CilkForStmtClass; + } + + // Iterators + child_range children() { + return child_range(&SubExprs[0], &SubExprs[END]); + } + + const_child_range children() const { + return const_child_range(&SubExprs[0], &SubExprs[END]); + } +}; + +/// CilkScopeStmt - This represents a _Cilk_scope. +/// +class CilkScopeStmt : public Stmt { + SourceLocation ScopeLoc; + Stmt *Body; + +public: + explicit CilkScopeStmt(SourceLocation SL) : CilkScopeStmt(SL, nullptr) {} + + CilkScopeStmt(SourceLocation SL, Stmt *S) + : Stmt(CilkScopeStmtClass), ScopeLoc(SL), Body(S) {} + + // Build an empty _Cilk_scope statement. + explicit CilkScopeStmt(EmptyShell Empty) : Stmt(CilkScopeStmtClass, Empty) {} + + const Stmt *getBody() const { return Body; } + Stmt *getBody() { return Body; } + void setBody(Stmt *S) { Body = S; } + + SourceLocation getScopeLoc() const { return ScopeLoc; } + void setScopeLoc(SourceLocation L) { ScopeLoc = L; } + + SourceLocation getBeginLoc() const LLVM_READONLY { return ScopeLoc; } + SourceLocation getEndLoc() const LLVM_READONLY { + return Body->getEndLoc(); + } + + static bool classof(const Stmt *T) { + return T->getStmtClass() == CilkScopeStmtClass; + } + + // Iterators + child_range children() { + return child_range(&Body, &Body+1); + } +}; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/AST/StmtVisitor.h b/clang/include/clang/AST/StmtVisitor.h index 3e5155199eace3..8e294b9736e0ac 100644 --- a/clang/include/clang/AST/StmtVisitor.h +++ b/clang/include/clang/AST/StmtVisitor.h @@ -13,11 +13,13 @@ #ifndef LLVM_CLANG_AST_STMTVISITOR_H #define LLVM_CLANG_AST_STMTVISITOR_H +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 8d20d088bb63c4..27e48c26d4fb39 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1289,7 +1289,8 @@ class QualType { DK_cxx_destructor, DK_objc_strong_lifetime, DK_objc_weak_lifetime, - DK_nontrivial_c_struct + DK_nontrivial_c_struct, + DK_hyperobject }; /// Returns a nonzero value if objects of this type require @@ -1376,6 +1377,8 @@ class QualType { /// Remove all qualifiers including _Atomic. QualType getAtomicUnqualifiedType() const; + QualType stripHyperobject() const; + private: // These methods are implemented in a separate translation unit; // "static"-ize them to avoid creating temporary QualTypes in the @@ -2184,6 +2187,7 @@ class alignas(8) Type : public ExtQualsTypeCommonBase { bool isAnyPointerType() const; // Any C pointer or ObjC object pointer bool isBlockPointerType() const; bool isVoidPointerType() const; + bool isHyperobjectType() const; bool isReferenceType() const; bool isLValueReferenceType() const; bool isRValueReferenceType() const; @@ -4884,6 +4888,7 @@ class RecordType : public TagType { /// Recursively check all fields in the record for const-ness. If any field /// is declared const, return true. Otherwise, return false. bool hasConstFields() const; + bool hasHyperobjectFields() const; bool isSugared() const { return false; } QualType desugar() const { return QualType(this, 0); } @@ -6620,6 +6625,41 @@ class DependentBitIntType final : public Type, public llvm::FoldingSetNode { } }; +class HyperobjectType final : public Type, public llvm::FoldingSetNode { + friend class ASTContext; + + QualType ElementType; + Expr *Identity, *Reduce; + const FunctionDecl *IdentityID, *ReduceID; + + HyperobjectType(QualType Element, QualType CanonicalPtr, + Expr *i, const FunctionDecl *ifn, + Expr *r, const FunctionDecl *rfn); + +public: + QualType getElementType() const { return ElementType; } + + static bool isNullish(Expr *); + + Expr *getIdentity() const { return Identity; } + Expr *getReduce() const { return Reduce; } + + bool hasCallbacks() const; + + bool isSugared() const { return false; } + QualType desugar() const { return QualType(this, 0); } + + void Profile(llvm::FoldingSetNodeID &ID) const; + + static void Profile(llvm::FoldingSetNodeID &ID, QualType Pointee, + const FunctionDecl *I, + const FunctionDecl *R); + + static bool classof(const Type *T) { + return T->getTypeClass() == Hyperobject; + } +}; + /// A qualifier set is used to build a set of qualifiers. class QualifierCollector : public Qualifiers { public: @@ -6945,6 +6985,10 @@ inline bool Type::isBlockPointerType() const { return isa(CanonicalType); } +inline bool Type::isHyperobjectType() const { + return isa(CanonicalType); +} + inline bool Type::isReferenceType() const { return isa(CanonicalType); } diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index 27f714b7c983af..afa6b4eaeca8fb 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -1930,6 +1930,26 @@ class ComplexTypeLoc : public InheritingConcreteTypeLoc { }; +class HyperobjectTypeLoc : + public PointerLikeTypeLoc { +public: + SourceLocation getHyperLoc() const { + return getSigilLoc(); + } + + void setHyperLoc(SourceLocation Loc) { + setSigilLoc(Loc); + } + + void initializeLocal(ASTContext &Context, SourceLocation Loc) { + setSigilLoc(Loc); + } + + QualType getInnerType() const { + return getTypePtr()->getElementType(); + } +}; + struct TypeofLocInfo { SourceLocation TypeofLoc; SourceLocation LParenLoc; diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td index 3cc826c1463a9f..74fbe744b7975c 100644 --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -17,6 +17,22 @@ let Class = ComplexType in { def : Creator<[{ return ctx.getComplexType(elementType); }]>; } +let Class = HyperobjectType in { + def : Property<"elementType", QualType> { + let Read = [{ node->getElementType() }]; + } + def : Property<"reduce", ExprRef> { + let Read = [{ node->getReduce() }]; + } + def : Property<"identity", ExprRef> { + let Read = [{ node->getIdentity() }]; + } + + def : Creator<[{ + return ctx.getHyperobjectType(elementType, reduce, identity); + }]>; +} + let Class = PointerType in { def : Property<"pointeeType", QualType> { let Read = [{ node->getPointeeType() }]; diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index d5204b28696672..b6060d9a1e3e86 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -387,6 +387,7 @@ def BlocksSupported : LangOpt<"Blocks">; def ObjCAutoRefCount : LangOpt<"ObjCAutoRefCount">; def ObjCNonFragileRuntime : LangOpt<"", "LangOpts.ObjCRuntime.allowsClassStubs()">; +def Cilk : LangOpt<"Cilk", "LangOpts.getCilk() != LangOptions::Cilk_none">; def HLSL : LangOpt<"HLSL">; @@ -3703,20 +3704,23 @@ def LoopHint : Attr { /// boolean: fully unrolls loop if State == Enable. /// expression: unrolls loop 'Value' times. + /// #pragma cilk grainsize directive + /// expression: coarsens the loop with grainsize 'Value'. + let Spellings = [Pragma<"clang", "loop">, Pragma<"", "unroll">, Pragma<"", "nounroll">, Pragma<"", "unroll_and_jam">, - Pragma<"", "nounroll_and_jam">]; + Pragma<"", "nounroll_and_jam">, Pragma<"", "cilk">]; /// State of the loop optimization specified by the spelling. let Args = [EnumArgument<"Option", "OptionType", ["vectorize", "vectorize_width", "interleave", "interleave_count", "unroll", "unroll_count", "unroll_and_jam", "unroll_and_jam_count", "pipeline", "pipeline_initiation_interval", "distribute", - "vectorize_predicate"], + "vectorize_predicate", "grainsize"], ["Vectorize", "VectorizeWidth", "Interleave", "InterleaveCount", "Unroll", "UnrollCount", "UnrollAndJam", "UnrollAndJamCount", "PipelineDisabled", "PipelineInitiationInterval", "Distribute", - "VectorizePredicate"]>, + "VectorizePredicate", "TapirGrainsize"]>, EnumArgument<"State", "LoopHintState", ["enable", "disable", "numeric", "fixed_width", "scalable_width", "assume_safety", "full"], @@ -3739,6 +3743,7 @@ def LoopHint : Attr { case PipelineInitiationInterval: return "pipeline_initiation_interval"; case Distribute: return "distribute"; case VectorizePredicate: return "vectorize_predicate"; + case TapirGrainsize: return "grainsize"; } llvm_unreachable("Unhandled LoopHint option."); } @@ -4199,3 +4204,55 @@ def AvailableOnlyInDefaultEvalMethod : InheritableAttr { let Documentation = [Undocumented]; } +// Cilk attributes + +// TODO: Add docs to these attributes + +def Stealable : InheritableAttr { + let Spellings = [Clang<"stealable">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StealableDocs]; +} + +def StrandPure : InheritableAttr { + let Spellings = [Clang<"strand_pure">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StrandPureDocs]; +} + +def StrandMalloc : InheritableAttr { + let Spellings = [Clang<"strand_malloc">]; + let Subjects = SubjectList<[Function]>; + let Documentation = [StrandMallocDocs]; +} + +def Injective : InheritableAttr { + // TODO: Associate this with a single argument, not the function. + let Spellings = [Clang<"injective">]; + let Subjects = SubjectList<[Function]>; + let Documentation = [Undocumented]; +} + +def HyperView : InheritableAttr { + let Spellings = [Clang<"hyper_view">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StrandMallocDocs]; +} + +def HyperToken : InheritableAttr { + let Spellings = [Clang<"hyper_token">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StrandMallocDocs]; +} + +def ReducerRegister : InheritableAttr { + let Spellings = [Clang<"reducer_register">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StrandMallocDocs]; +} + +def ReducerUnregister : InheritableAttr { + let Spellings = [Clang<"reducer_unregister">]; + let Subjects = SubjectList<[FunctionLike]>; + let Documentation = [StrandMallocDocs]; +} diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 2c950231255d7f..fd825d1f4c5698 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -7077,3 +7077,28 @@ the variables were declared in. It is not possible to check the return value (if any) of these ``cleanup`` callback functions. }]; } + +def StrandPureDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``strand_pure`` attribute denotes that the function acts like a pure function +when called multiple times within the same strand. + }]; +} + +def StealableDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``stealable`` attribute denotes that the function contains a continuation that +can be stolen by a work-stealing scheduler. + }]; +} + +def StrandMallocDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``strand_malloc`` attribute denotes that the function pointer acts +like a system memory allocation function from the perspective of +memory operations within the same strand. +}]; +} diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 6dad8b512bd2df..e29fe8124d856b 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1758,6 +1758,10 @@ BUILTIN(__builtin_ms_va_copy, "vc*&c*&", "n") // Arithmetic Fence: to prevent FP reordering and reassociation optimizations LANGBUILTIN(__arithmetic_fence, "v.", "tE", ALL_LANGUAGES) +// Tapir. Rewriting of reducer references happens during sema +// and needs a builtin to carry the information to codegen. +BUILTIN(__hyper_lookup, "v*vC*z.", "nU") + #undef BUILTIN #undef LIBBUILTIN #undef LANGBUILTIN diff --git a/clang/include/clang/Basic/Cilk.h b/clang/include/clang/Basic/Cilk.h new file mode 100644 index 00000000000000..ff7055dee0d648 --- /dev/null +++ b/clang/include/clang/Basic/Cilk.h @@ -0,0 +1,49 @@ +//===- Cilk.h - Cilk Language Family Options --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_CILK_H +#define LLVM_CLANG_BASIC_CILK_H + +#include "clang/Basic/LLVM.h" + +namespace clang { + +using CilkOptionMask = uint64_t; +enum CilkOption : uint64_t { + // Each Cilk option is mapped to a distinct bit. Currently we support at most + // 64 Cilk options. + CilkOpt_Pedigrees = 1ULL << 0, +}; + +struct CilkOptionSet { +private: + static bool isPowerOf2(CilkOptionMask CO) { + return (CO & -CO) == CO; + } + +public: + // Check if a given single Cilk option is enabled. + bool has(CilkOptionMask CO) const { + assert(isPowerOf2(CO) && "Must be a single Cilk option."); + return static_cast(Mask & CO); + } + + // Enable or disable a particular Cilk option. + void set(CilkOptionMask CO, bool Value) { + // Ensure that CO is a power of 2. + assert(isPowerOf2(CO) && "Must be a single Cilk option."); + Mask = Value ? (Mask | CO) : (Mask & ~CO); + } + + // Bitmask of enabled Cilk options. + CilkOptionMask Mask = 0; +}; + +} // namespace clang + +#endif // LLVM_CLANG_BASIC_CILK_H diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index d492b8681c5da8..b89bd0400ca4a3 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -309,6 +309,7 @@ VALUE_CODEGENOPT(TimeTraceGranularity, 32, 500) ///< Minimum time granularity (i ///< traced by time profiler CODEGENOPT(UnrollLoops , 1, 0) ///< Control whether loops are unrolled. CODEGENOPT(RerollLoops , 1, 0) ///< Control whether loops are rerolled. +CODEGENOPT(StripmineLoop , 1, 0) ///< Run Tapir loop stripmining. CODEGENOPT(NoUseJumpTables , 1, 0) ///< Set when -fno-jump-tables is enabled. VALUE_CODEGENOPT(UnwindTables, 2, 0) ///< Unwind tables (1) or asynchronous unwind tables (2) CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer. @@ -426,6 +427,9 @@ VALUE_CODEGENOPT(InlineMaxStackSize, 32, UINT_MAX) // Vector functions library to use. ENUM_CODEGENOPT(VecLib, VectorLibrary, 3, NoLibrary) +/// Tapir target runtime library to use. +ENUM_CODEGENOPT(TapirTarget, TapirTargetID, 8, TapirTargetID::Last_TapirTargetID) + /// The default TLS model to use. ENUM_CODEGENOPT(DefaultTLSModel, TLSModel, 2, GeneralDynamicTLSModel) diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h index 14fc94fe27f995..7dc7f53212eac7 100644 --- a/clang/include/clang/Basic/CodeGenOptions.h +++ b/clang/include/clang/Basic/CodeGenOptions.h @@ -18,6 +18,7 @@ #include "llvm/ADT/FloatingPointMode.h" #include "llvm/Frontend/Debug/Options.h" #include "llvm/Support/CodeGen.h" +#include "clang/Basic/Tapir.h" #include "llvm/Support/Regex.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h" @@ -437,6 +438,9 @@ class CodeGenOptions : public CodeGenOptionsBase { /// passed on the command line. std::string StackUsageOutput; + // Path to OpenCilk runtime bitcode file. + std::string OpenCilkABIBitcodeFile; + /// Executable and command-line used to create a given CompilerInvocation. /// Most of the time this will be the full -cc1 command. const char *Argv0 = nullptr; diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 1b69324d073ab5..78927a8eef9d0a 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -316,6 +316,18 @@ def err_drv_invalid_cf_runtime_abi "'objc', 'standalone', 'swift', 'swift-5.0', 'swift-4.2', 'swift-4.1'">; def err_drv_gnustep_objc_runtime_incompatible_binary : Error< "GNUstep Objective-C runtime version %0 incompatible with target binary format">; +def err_drv_double_cilk : Error< + "Conflicting Cilk versions">; +def err_drv_cilk_objc : Error< + "Cilk does not support Objective-C">; +def err_drv_cilk_unsupported: Error< + "Cilk not yet supported for this target">; +def err_drv_opencilk_missing_abi_bitcode: Error< + "Cannot find OpenCilk runtime ABI bitcode file: %0">; +def err_drv_opencilk_resource_dir_missing_include: Error< + "No include directory in OpenCilk resource directory: %0">; +def err_drv_opencilk_resource_dir_missing_lib: Error< + "No lib directory in OpenCilk resource directory: %0">; def err_drv_emit_llvm_link : Error< "-emit-llvm cannot be used when linking">; def err_drv_optimization_remark_pattern : Error< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 26bc88a980e4f6..e79416b7d8a13f 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -988,6 +988,12 @@ def Move : DiagGroup<"move", [ SelfMove ]>; +// Cilk Plus warnings. +def SourceUsesCilkPlus : DiagGroup<"source-uses-cilk-plus">; +def CilkPlusLoopControlVarModification : DiagGroup<"cilk-loop-control-var-modification">; +def ReturnCilkSpawn : DiagGroup<"return-cilk-spawn">; +def CilkIgnored : DiagGroup<"cilk-ignored">; + def Extra : DiagGroup<"extra", [ DeprecatedCopy, MissingFieldInitializers, diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 8d729c31641ed8..c000d3846121bd 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1342,6 +1342,34 @@ def err_opencl_logical_exclusive_or : Error< def err_openclcxx_virtual_function : Error< "virtual functions are not supported in C++ for OpenCL">; +// Cilk Plus +def err_cilkplus_disable: Error< + "Cilk Plus support disabled - compile with -fcilkplus">; +def err_cilk_for_missing_control_variable: Error< + "missing control variable declaration and initialization in '_Cilk_for'">; +def err_cilk_for_missing_condition: Error< + "missing loop condition expression in '_Cilk_for'">; +def err_cilk_for_missing_increment: Error< + "missing loop increment expression in '_Cilk_for'">; +def err_cilk_for_missing_semi: Error< + "expected ';' in '_Cilk_for'">; +def err_cilk_for_forrange_loop_not_supported: Error< + "'_Cilk_for' not supported on for-range loops">; +def err_cilk_for_foreach_loop_not_supported: Error< + "'_Cilk_for' not supported on for-each loops">; +def err_pragma_cilk_invalid_option : Error< + "%select{invalid|missing}0 option%select{ %1|}0; expected grainsize">; +def err_pragma_cilk_grainsize_missing_argument : Error< + "missing argument; expected an integer value">; +def warn_cilk_for_following_grainsize: Warning< + "'#pragma cilk' ignored, because it is not followed by a '_Cilk_for' loop">, + InGroup; +def warn_pragma_cilk_grainsize_equals: Warning< + "'#pragma cilk grainsize' no longer requires '='">, + InGroup; +def error_hyperobject_arguments: Error< + "hyperobject must have 0 or 2 callbacks">; + // OpenMP support. def warn_pragma_omp_ignored : Warning< "unexpected '#pragma omp ...' in program">, InGroup, DefaultIgnore; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c88f25209fc0fa..1ab8fcc46c648b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -4483,7 +4483,7 @@ def err_param_default_argument_on_parameter_pack : Error< "parameter pack cannot have a default argument">; def err_uninitialized_member_for_assign : Error< "cannot define the implicit copy assignment operator for %0, because " - "non-static %select{reference|const}1 member %2 cannot use copy " + "non-static %select{reference|const|hyperobject}1 member %2 cannot use copy " "assignment operator">; def err_uninitialized_member_in_ctor : Error< "%select{constructor for %1|" @@ -10513,6 +10513,185 @@ def warn_imp_cast_drops_unaligned : Warning< } // end of sema category +// Cilk Plus warnings and errors +let CategoryName = "Cilk Issue" in { +def err_cilk_unevaluated_context : Error< + "'%0' cannot be used in an unevaluated context">; +def err_cilk_outside_function : Error< + "'%0' cannot be used outside a function">; +def err_cilk_spawn_invalid_func_context : Error< + "'%1' cannot be used in %select{a constructor|a destructor" + "|a copy assignment operator|a move assignment operator|the 'main' function" + "|a constexpr function|a function with a deduced return type" + "|a varargs function}0">; +def err_pragma_cilk_precedes_noncilk : Error< + "expected a Cilk keyword to follow '%0'">; + +// cilk_scope +def note_protected_by_cilk_scope: Note< + "jump bypasses '_Cilk_scope'">; + +// cilk_spawn +def err_spawn_invalid_scope : Error< + "'_Cilk_spawn' not allowed in this scope">; +def err_spawn_invalid_decl : Error< + "_Cilk_spawn not supported in a '%0Decl'">; +def err_spawn_spawn : Error< + "consecutive _Cilk_spawn tokens not allowed">; +def err_invalid_spawn_expr : Error< + "invalid _Cilk_spawn in expression">; +def err_cannot_spawn_builtin: Error< + "builtin function cannot be spawned">; +def err_cannot_spawn_function: Error< + "function cannot be spawned">; +def err_cannot_init_static_variable: Error< + "cannot spawn initialize a 'static' variable">; +def err_multiple_spawns: Error< + "multiple spawns among call arguments">; +def note_multiple_spawns: Note< + "another spawn here">; +def note_protected_by_spawn: Note< + "jump bypasses '_Cilk_spawn'">; +def note_exits_spawn : Note< + "jump exits the scope of a '_Cilk_spawn'">; +def err_jump_out_of_spawn : Error< + "cannot jump out of '_Cilk_spawn' statement">; +def err_cilk_spawn_cannot_return: Error< + "cannot return from within a '_Cilk_spawn' statement">; +def warn_return_cilk_spawn : Warning< + "no parallelism from a '_Cilk_spawn' in a return statement">, InGroup; + +// cilk_for +def err_cilk_for_initializer_expected_variable : Error< + "expected a variable for control variable in '_Cilk_for'">; +def err_cilk_for_initializer_expected_decl : Error< + "expected control variable declaration in initializer in '_Cilk_for'">; +def err_cilk_for_decl_multiple_variables : Error< + "cannot declare more than one loop control variable in '_Cilk_for'">; +def err_cilk_for_init_multiple_variables : Error< + "cannot initialize more than one loop control variable in '_Cilk_for'">; +def err_cilk_for_control_variable_not_initialized : Error< + "_Cilk_for loop control variable must be initialized">; +def err_cilk_for_initialization_must_be_decl : Error< + "loop initialization must be a declaration in '_Cilk_for'">; + +def err_cilk_for_control_variable_storage_class : Error< + "loop control variable cannot have storage class '%0' in '_Cilk_for'">; +def err_cilk_for_control_variable_qualifier : Error< + "loop control variable cannot be '%0' in '_Cilk_for'">; +def err_cilk_for_control_variable_not_local : Error< + "non-local loop control variable in '_Cilk_for'">; +def err_cilk_for_control_variable_type : Error< + "loop control variable must have an integral, pointer, or class type " + "in '_Cilk_for'">; + +def err_cilk_for_invalid_cond_expr: Error< + "expected binary comparison operator in '_Cilk_for' loop condition">; +def err_cilk_for_invalid_cond_operator: Error< + "loop condition operator must be one of '<', '<=', '>', '>=', or '!=' " + "in '_Cilk_for'">; +def err_cilk_for_cond_test_control_var: Error< + "loop condition does not test control variable %0 in '_Cilk_for'">; +def note_cilk_for_cond_allowed: Note< + "allowed forms are %0 OP expr, and expr OP %0">; +def err_cilk_for_difference_ill_formed: Error< + "end - begin must be well-formed in '_Cilk_for'">; +def err_non_integral_cilk_for_difference_type: Error< + "end - begin must have integral type in '_Cilk_for' - got %0">; + +def err_cilk_for_increment_not_control_var: Error< + "loop increment does not modify control variable %0 in '_Cilk_for'">; +def err_cilk_for_invalid_increment : Error< + "loop increment operator must be one of operators '++', '--', '+=', or '-=' in '_Cilk_for'">; +def err_cilk_for_invalid_increment_rhs : Error< + "right-hand side of '%0' must have integral or enum type in '_Cilk_for' increment">; +def err_cilk_for_increment_inconsistent : Error< + "loop increment is inconsistent with condition in '_Cilk_for': expected %select{negative|positive}0 stride">; +def err_cilk_for_increment_zero : Error< + "loop increment must be non-zero in '_Cilk_for'">; +def err_cilk_for_loop_modifies_control_var : Error< + "Modifying the loop control variable inside a '_Cilk_for' has undefined behavior">; +def warn_cilk_for_loop_control_var_func : Warning< + "Modifying the loop control variable inside a '_Cilk_for' using a function call has undefined behavior">, InGroup, DefaultIgnore; +def warn_cilk_for_loop_control_var_aliased: Warning< + "Modifying the loop control variable '%0' through an alias in '_Cilk_for' has undefined behavior">, InGroup, DefaultIgnore; +def note_cilk_for_loop_control_var_declared_here: Note< + "'_Cilk_for' loop control variable declared here">; +def warn_empty_cilk_for_body : Warning< + "Cilk for loop has empty body">, InGroup; + +def note_constant_stride: Note< + "constant stride is %0">; +def warn_cilk_for_cond_user_defined_conv: Warning< + "user-defined conversion from %0 to %1 will not be used when calculating the " + "number of iterations in '_Cilk_for'">, InGroup, DefaultWarn; +def note_cilk_for_conversion_here : Note< + "conversion to type %0 declared here">; +def err_cilk_for_cannot_return: Error< + "cannot return from within a '_Cilk_for' loop">; +def err_cilk_for_cannot_break: Error< + "cannot break from a '_Cilk_for' loop">; +def warn_cilk_for_loop_count_downcast: Warning< + "implicit loop count downcast from %0 to %1 in '_Cilk_for'">, + InGroup, DefaultWarn; + +def err_cilk_for_grainsize_negative: Error< + "the behavior of Cilk for is unspecified for a negative grainsize">; +def note_cilk_for_grainsize_conversion : Note< + "grainsize must evaluate to a type convertible to %0">; + +def warn_cilk_for_wraparound: Warning< + "%0 stride causes %1 wraparound">, InGroup, DefaultWarn; + +def note_cilk_for_wraparound_undefined: Note< + "wraparounds cause undefined behavior in Cilk for">; + +def note_protected_by_cilk_for: Note< + "jump bypasses '_Cilk_for'">; +def note_exits_cilk_for : Note< + "jump exits the scope of a '_Cilk_for'">; +def err_jump_out_of_cilk_for : Error< + "cannot jump out of '_Cilk_for' statement">; + +def attribute_requires_cilk : Warning< + "%0 ignored when OpenCilk not in use">, + InGroup>; + +def variable_length_hyperobject : Error< + "variable length type %0 may not be a hyperobject">; + +def confusing_hyperobject : Error< + "type %0 may not be a hyperobject">; + +def incomplete_hyperobject : Error< + "incomplete type %0 may not be a hyperobject">; + +def nested_hyperobject : Error< + "type %0, which contains a hyperobject, may not be a hyperobject">; + +def reducer_callbacks_not_allowed: Warning< + "reducer callbacks not implemented for structure members">, + InGroup; + +def err_invalid_reducer_callback : Error< + "reducer callback must be function with %0 pointer parameter%s0" +>; + +def note_protected_by_reducer : Note< + "jump bypasses initialization of variable with __attribute__((reducer))">; + +def err_hyperobject_param : Error<"parameter is hyperobject">; + +def err_hyperobject_cast : Error<"cast to hyperobject">; + +def err_hyperobject_struct_assign : Error< + "unimplemented assignment to structure with hyperobject member">; + +def no_reducer_array : Warning< + "array of reducer not implemented">, + InGroup; +} // end of Cilk category + let CategoryName = "OpenMP Issue" in { // OpenMP support. def err_omp_expected_var_arg : Error< diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index e05ac46258272c..26dc7f65765e4e 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -99,6 +99,9 @@ FEATURE(memory_sanitizer, FEATURE(thread_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Thread)) FEATURE(dataflow_sanitizer, LangOpts.Sanitize.has(SanitizerKind::DataFlow)) FEATURE(scudo, LangOpts.Sanitize.hasOneOf(SanitizerKind::Scudo)) +FEATURE(cilk_sanitizer, LangOpts.Sanitize.has(SanitizerKind::Cilk)) +FEATURE(comprehensive_static_instrumentation, + LangOpts.getComprehensiveStaticInstrumentation()) FEATURE(swiftasynccc, PP.getTargetInfo().checkCallingConvention(CC_SwiftAsync) == clang::TargetInfo::CCCR_OK) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index f7ec0406f33e3f..551887bc942f3d 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -394,6 +394,11 @@ LANGOPT(ApplePragmaPack, 1, 0, "Apple gcc-compatible #pragma pack handling") LANGOPT(XLPragmaPack, 1, 0, "IBM XL #pragma pack handling") +ENUM_LANGOPT(Cilk, CilkVersion, 3, Cilk_none, "Enable Cilk language extensions") + +BENIGN_ENUM_LANGOPT(Cilktool, CilktoolKind, 3, + Cilktool_None, "turn on the specified Cilk tool") + LANGOPT(RetainCommentsFromSystemHeaders, 1, 0, "retain documentation comments from system headers in the AST") LANGOPT(SanitizeAddressFieldPadding, 2, 0, "controls how aggressive is ASan " @@ -402,6 +407,9 @@ LANGOPT(SanitizeAddressFieldPadding, 2, 0, "controls how aggressive is ASan " LANGOPT(Cmse, 1, 0, "ARM Security extensions support") +BENIGN_ENUM_LANGOPT(ComprehensiveStaticInstrumentation, CSIExtensionPoint, 3, + CSI_None, "turn on Comprehensive Static Instrumentation") + LANGOPT(XRayInstrument, 1, 0, "controls whether to do XRay instrumentation") LANGOPT(XRayAlwaysEmitCustomEvents, 1, 0, "controls whether to always emit intrinsic calls to " diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index 3ef68ca8af6685..473dbe40b10484 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -14,6 +14,7 @@ #ifndef LLVM_CLANG_BASIC_LANGOPTIONS_H #define LLVM_CLANG_BASIC_LANGOPTIONS_H +#include "clang/Basic/Cilk.h" #include "clang/Basic/CommentOptions.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangStandard.h" @@ -380,6 +381,32 @@ class LangOptions : public LangOptionsBase { IncompleteOnly = 3, }; + enum CSIExtensionPoint { + // Don't run CSI + CSI_None = 0, + // The following extension points should be consistent with the extension + // points allowed by the pass manager, except for EnabledOnOptLevel0. + CSI_EarlyAsPossible, + CSI_ModuleOptimizerEarly, + CSI_OptimizerLast, + CSI_TapirLate, + CSI_TapirLoopEnd + }; + + enum CilktoolKind { + // No Cilktool + Cilktool_None = 0, + Cilktool_Cilkscale, + Cilktool_Cilkscale_InstructionCount, + Cilktool_Cilkscale_Benchmark + }; + + enum CilkVersion { + Cilk_none = 0, + Cilk_plus = 1, + Cilk_opencilk = 2 + }; + public: /// The used language standard. LangStandard::Kind LangStd; @@ -491,6 +518,9 @@ class LangOptions : public LangOptionsBase { // received as a result of a standard operator new (-fcheck-new) bool CheckNew = false; + /// Set of enabled Cilk options. + CilkOptionSet CilkOptions; + LangOptions(); /// Set language defaults for the given input language and diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index c2137e3f61f645..73b3c70158838b 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -76,6 +76,9 @@ SANITIZER("fuzzer-no-link", FuzzerNoLink) // ThreadSanitizer SANITIZER("thread", Thread) +// CilkSanitizer +SANITIZER("cilk", Cilk) + // LeakSanitizer SANITIZER("leak", Leak) diff --git a/clang/include/clang/Basic/StmtNodes.td b/clang/include/clang/Basic/StmtNodes.td index 4b31e06eb2cdb7..2b62294b4c0340 100644 --- a/clang/include/clang/Basic/StmtNodes.td +++ b/clang/include/clang/Basic/StmtNodes.td @@ -217,6 +217,13 @@ def MSDependentExistsStmt : StmtNode; // OpenCL Extensions. def AsTypeExpr : StmtNode; +// Cilk Plus Extensions. +def CilkSyncStmt : StmtNode; +def CilkSpawnStmt : StmtNode; +def CilkSpawnExpr : StmtNode; +def CilkForStmt : StmtNode; +def CilkScopeStmt : StmtNode; + // OpenMP Directives. def OMPCanonicalLoop : StmtNode; def OMPExecutableDirective : StmtNode; diff --git a/clang/include/clang/Basic/Tapir.h b/clang/include/clang/Basic/Tapir.h new file mode 100644 index 00000000000000..0d038af275d28d --- /dev/null +++ b/clang/include/clang/Basic/Tapir.h @@ -0,0 +1,24 @@ +//===--- Tapir.h - C Language Family Language Options -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Defines helper functions for processing flags related to Tapir. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_BASIC_TAPIR_H +#define LLVM_CLANG_BASIC_TAPIR_H + +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" + +namespace clang { + +using TapirTargetID = llvm::TapirTargetID; + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index ef0dad0f2dcd96..7aec5d497fa989 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -325,6 +325,7 @@ KEYWORD(_Atomic , KEYALL|KEYNOOPENCL) KEYWORD(_Bool , KEYNOCXX) KEYWORD(_Complex , KEYALL) KEYWORD(_Generic , KEYALL) +KEYWORD(_Hyperobject , KEYALL) KEYWORD(_Imaginary , KEYALL) KEYWORD(_Noreturn , KEYALL) KEYWORD(_Static_assert , KEYALL) @@ -470,6 +471,12 @@ KEYWORD(L__FUNCSIG__ , KEYMS) TYPE_TRAIT_1(__is_interface_class, IsInterfaceClass, KEYMS) TYPE_TRAIT_1(__is_sealed, IsSealed, KEYMS) +// Cilk Extensions +KEYWORD(_Cilk_spawn , KEYALL) +KEYWORD(_Cilk_sync , KEYALL) +KEYWORD(_Cilk_for , KEYALL) +KEYWORD(_Cilk_scope , KEYALL) + // MSVC12.0 / VS2013 Type Traits TYPE_TRAIT_1(__is_destructible, IsDestructible, KEYALL) TYPE_TRAIT_1(__is_trivially_destructible, IsTriviallyDestructible, KEYCXX) @@ -665,6 +672,7 @@ ALIAS("__complex__" , _Complex , KEYALL) ALIAS("__const" , const , KEYALL) ALIAS("__const__" , const , KEYALL) ALIAS("__decltype" , decltype , KEYCXX) +ALIAS("__hyperobject", _Hyperobject, KEYALL) ALIAS("__imag__" , __imag , KEYALL) ALIAS("__inline" , inline , KEYALL) ALIAS("__inline__" , inline , KEYALL) diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td index 649b071cebb940..4d58012bbecbbe 100644 --- a/clang/include/clang/Basic/TypeNodes.td +++ b/clang/include/clang/Basic/TypeNodes.td @@ -111,3 +111,4 @@ def PipeType : TypeNode; def AtomicType : TypeNode; def BitIntType : TypeNode; def DependentBitIntType : TypeNode, AlwaysDependent; +def HyperobjectType : TypeNode; diff --git a/clang/include/clang/Config/config.h.cmake b/clang/include/clang/Config/config.h.cmake index a54a26cd32ffe4..1ebaafbfb91eb2 100644 --- a/clang/include/clang/Config/config.h.cmake +++ b/clang/include/clang/Config/config.h.cmake @@ -35,6 +35,9 @@ /* Multilib basename for libdir. */ #define CLANG_INSTALL_LIBDIR_BASENAME "${CLANG_INSTALL_LIBDIR_BASENAME}" +/* Default Tapir runtime used by -ftapir. */ +#define CLANG_DEFAULT_TAPIR_RUNTIME "${CLANG_DEFAULT_TAPIR_RUNTIME}" + /* Relative directory for resource files */ #define CLANG_RESOURCE_DIR "${CLANG_RESOURCE_DIR}" diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 37e8c56b2d295e..81b01dc51ce61e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2041,6 +2041,21 @@ def fsanitize_undefined_strip_path_components_EQ : Joined<["-"], "fsanitize-unde HelpText<"Strip (or keep only, if negative) a given number of path components " "when emitting check metadata.">, MarshallingInfoInt, "0", "int">; +def fcsi : Flag<["-"], "fcsi">, Group, + MetaVarName<"">, + HelpText<"Turn on Comprehensive Static Instrumentation.">; +def fcsi_EQ : Joined<["-"], "fcsi=">, Group, + MetaVarName<"">, + HelpText<"Turn on Comprehensive Static Instrumentation at the " + "specified point in the optimizer: first, early, " + "tapirlate, aftertapirloops, last">; +def fcilktool_EQ : Joined<["-"], "fcilktool=">, Group, + MetaVarName<"">, + HelpText<"Turn on the designated Cilk tool">; +def shared_libcilktool : Flag<["-"], "shared-libcilktool">, + HelpText<"Dynamically link the cilktool runtime">; +def static_libcilktool : Flag<["-"], "static-libcilktool">, + HelpText<"Statically link the cilktool runtime">; } // end -f[no-]sanitize* flags @@ -2605,6 +2620,23 @@ def fno_knr_functions : Flag<["-"], "fno-knr-functions">, Group, HelpText<"Disable support for K&R C function declarations">, Flags<[CC1Option, CoreOption]>; +def ftapir_EQ : Joined<["-"], "ftapir=">, Group, Flags<[CC1Option]>, + HelpText<"Choose the backend parallel runtime for Tapir instructions">, + Values<"none,serial,cilkplus,cheetah,lambda,omptask,opencilk,qthreads">; +def fcilkplus : Flag<["-"], "fcilkplus">, Group, Flags<[CC1Option]>, + HelpText<"Enable Cilk Plus extensions">; +def fopencilk : Flag<["-"], "fopencilk">, Group, Flags<[CC1Option]>, + HelpText<"Enable OpenCilk extensions">; +def static_libopencilk : Flag<["-"], "static-libopencilk">; +def opencilk_resource_dir_EQ : Joined<["--"], "opencilk-resource-dir=">, + Flags<[NoXarchOption]>, + HelpText<"The directory that holds OpenCilk resource files">; +def opencilk_abi_bitcode_EQ : Joined<["--"], "opencilk-abi-bitcode=">, + Flags<[CC1Option]>, HelpText<"Path to OpenCilk ABI bitcode file">, + MarshallingInfoString>; +def fopencilk_enable_pedigrees : Flag<["-"], "fopencilk-enable-pedigrees">, + Group, Flags<[CC1Option]>, HelpText<"Enable OpenCilk pedigrees">; + def fmudflapth : Flag<["-"], "fmudflapth">, Group; def fmudflap : Flag<["-"], "fmudflap">, Group; def fnested_functions : Flag<["-"], "fnested-functions">, Group; @@ -3106,6 +3138,10 @@ def Wlarge_by_value_copy_def : Flag<["-"], "Wlarge-by-value-copy">, def Wlarge_by_value_copy_EQ : Joined<["-"], "Wlarge-by-value-copy=">, Flags<[CC1Option]>, MarshallingInfoInt>; +def fstripmine : Flag<["-"], "fstripmine">, Group, + HelpText<"Enable the Tapir loop stripmining passes">; +def fno_stripmine : Flag<["-"], "fno-stripmine">, Group; + // These "special" warning flags are effectively processed as f_Group flags by the driver: // Just silence warnings about -Wlarger-than for now. def Wlarger_than_EQ : Joined<["-"], "Wlarger-than=">, Group; @@ -5910,6 +5946,9 @@ def vectorize_loops : Flag<["-"], "vectorize-loops">, def vectorize_slp : Flag<["-"], "vectorize-slp">, HelpText<"Run the SLP vectorization passes">, MarshallingInfoFlag>; +def stripmine_loops : Flag<["-"], "stripmine-loops">, + HelpText<"Run the Tapir Loop stripmining passes">, + MarshallingInfoFlag>; def dependent_lib : Joined<["--"], "dependent-lib=">, HelpText<"Add dependent library">, MarshallingInfoStringVector>; diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index 047b50626c44c5..2d15c1da4d9dac 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -86,6 +86,7 @@ class SanitizerArgs { return needsHwasanRt() && HwasanUseAliases; } bool needsTsanRt() const { return Sanitizers.has(SanitizerKind::Thread); } + bool needsCilksanRt() const { return Sanitizers.has(SanitizerKind::Cilk); } bool needsMsanRt() const { return Sanitizers.has(SanitizerKind::Memory); } bool needsFuzzer() const { return Sanitizers.has(SanitizerKind::Fuzzer); } bool needsLsanRt() const { diff --git a/clang/include/clang/Driver/Tapir.h b/clang/include/clang/Driver/Tapir.h new file mode 100644 index 00000000000000..a512a0ef830a88 --- /dev/null +++ b/clang/include/clang/Driver/Tapir.h @@ -0,0 +1,31 @@ +//===--- Tapir.h - C Language Family Language Options -----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// Defines helper functions for processing flags related to Tapir. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_DRIVER_TAPIR_H +#define LLVM_CLANG_DRIVER_TAPIR_H + +#include "clang/Basic/Tapir.h" + +namespace llvm { +namespace opt { + class ArgList; +} +} + +namespace clang { + +TapirTargetID parseTapirTarget(const llvm::opt::ArgList &Args); +std::optional serializeTapirTarget(TapirTargetID Target); + +} // end namespace clang + +#endif diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 2e74507f71267c..8b41a49a15caa3 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -793,6 +793,39 @@ class ToolChain { } return TT; } + + /// Check the specified OpenCilk resource directory is valid. + virtual void AddOpenCilkIncludeDir(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + + /// Get the OpenCilk library path if it exists. + virtual path_list + getOpenCilkRuntimePaths(const llvm::opt::ArgList &Args) const; + + virtual std::string getOpenCilkBCBasename(const llvm::opt::ArgList &Args, + StringRef Component, + bool AddArch) const; + + virtual std::optional + getOpenCilkBC(const llvm::opt::ArgList &Args, StringRef Component) const; + + virtual std::string getOpenCilkRTBasename(const llvm::opt::ArgList &Args, + StringRef Component, FileType Type, + bool AddArch) const; + + virtual std::string getOpenCilkRT(const llvm::opt::ArgList &Args, + StringRef Component, FileType Type) const; + + /// AddOpenCilkBitcodeABI - Add compiler arguments for linking against the + /// OpenCilk runtime ABI bitcode file. + virtual void AddOpenCilkABIBitcode(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + bool IsLTO = false) const; + + /// AddTapirRuntimeLibArgs - Add the specific linker arguments to use for the + /// given Tapir runtime library type. + virtual void AddTapirRuntimeLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; }; /// Set a ToolChain's effective triple. Reset it when the registration object diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 475dfe845528d9..5bde1fb7f69db9 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -205,6 +205,7 @@ class Parser : public CodeCompletionHandler { std::unique_ptr MSFenvAccess; std::unique_ptr MSAllocText; std::unique_ptr CUDAForceHostDeviceHandler; + std::unique_ptr CilkHintHandler; std::unique_ptr OptimizeHandler; std::unique_ptr LoopHintHandler; std::unique_ptr UnrollHintHandler; @@ -2125,6 +2126,10 @@ class Parser : public CodeCompletionHandler { StmtResult ParseContinueStatement(); StmtResult ParseBreakStatement(); StmtResult ParseReturnStatement(); + StmtResult ParseCilkSpawnStatement(); + StmtResult ParseCilkSyncStatement(); + StmtResult ParseCilkForStatement(SourceLocation *TrailingElseLoc); + StmtResult ParseCilkScopeStatement(); StmtResult ParseAsmStatement(bool &msAsm); StmtResult ParseMicrosoftAsmStatement(SourceLocation AsmLoc); StmtResult ParsePragmaLoopHint(StmtVector &Stmts, ParsedStmtContext StmtCtx, diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h index c63378c732908d..a4cc0e4c1920ed 100644 --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -1213,7 +1213,8 @@ struct DeclaratorChunk { DeclaratorChunk() {}; enum { - Pointer, Reference, Array, Function, BlockPointer, MemberPointer, Paren, Pipe + Pointer, Reference, Array, Function, BlockPointer, MemberPointer, + Paren, Pipe, Hyperobject } Kind; /// Loc - The place where this type was defined. @@ -1584,6 +1585,16 @@ struct DeclaratorChunk { void destroy() {} }; + struct HyperobjectTypeInfo { + SourceLocation LParenLoc; + SourceLocation RParenLoc; + Expr *Arg[2]; + void destroy() { + Arg[0] = nullptr; + Arg[1] = nullptr; + } + }; + union { PointerTypeInfo Ptr; ReferenceTypeInfo Ref; @@ -1592,6 +1603,7 @@ struct DeclaratorChunk { BlockPointerTypeInfo Cls; MemberPointerTypeInfo Mem; PipeTypeInfo PipeInfo; + HyperobjectTypeInfo Hyper; }; void destroy() { @@ -1604,6 +1616,7 @@ struct DeclaratorChunk { case DeclaratorChunk::MemberPointer: return Mem.destroy(); case DeclaratorChunk::Paren: return; case DeclaratorChunk::Pipe: return PipeInfo.destroy(); + case DeclaratorChunk::Hyperobject: return Hyper.destroy(); } } @@ -1721,6 +1734,21 @@ struct DeclaratorChunk { return I; } + static DeclaratorChunk getHyperobject(unsigned TypeQuals, + SourceLocation Loc, + SourceLocation LParen, + SourceLocation RParen, + Expr *E0, Expr *E1) { + DeclaratorChunk I; + I.Kind = Hyperobject; + I.Loc = Loc; + I.Hyper.LParenLoc = LParen; + I.Hyper.RParenLoc = RParen; + I.Hyper.Arg[0] = E0; + I.Hyper.Arg[1] = E1; + return I; + } + /// Return a DeclaratorChunk for a paren. static DeclaratorChunk getParen(SourceLocation LParenLoc, SourceLocation RParenLoc) { @@ -2389,6 +2417,7 @@ class Declarator { case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return false; } llvm_unreachable("Invalid type chunk"); diff --git a/clang/include/clang/Sema/Initialization.h b/clang/include/clang/Sema/Initialization.h index 2072cd8d1c3ef8..3ad8f71d513fd2 100644 --- a/clang/include/clang/Sema/Initialization.h +++ b/clang/include/clang/Sema/Initialization.h @@ -446,7 +446,7 @@ class alignas(8) InitializedEntity { const InitializedEntity *getParent() const { return Parent; } /// Retrieve type being initialized. - QualType getType() const { return Type; } + QualType getType() const { return Type.stripHyperobject(); } /// Retrieve complete type-source information for the object being /// constructed, if known. @@ -941,7 +941,10 @@ class InitializationSequence { /// Initialize an aggreagate with parenthesized list of values. /// This is a C++20 feature. - SK_ParenthesizedListInit + SK_ParenthesizedListInit, + + /// OpenCilk + SK_ViewLookup }; /// A single step in the initialization sequence. @@ -988,6 +991,10 @@ class InitializationSequence { /// The kind of initialization sequence computed. enum SequenceKind SequenceKind; + /// Whether this initialization sequence is spawned. + bool IsSpawned = false; + SourceLocation SpawnLoc; + /// Steps taken by this initialization. SmallVector Steps; @@ -1340,6 +1347,8 @@ class InitializationSequence { /// Add a zero-initialization step. void AddZeroInitializationStep(QualType T); + void AddViewLookup(QualType T); + /// Add a C assignment step. // // FIXME: It isn't clear whether this should ever be needed; diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index a97968dc7b2096..3d27ef925eb860 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -101,6 +101,8 @@ class Sema; /// convert an argument to a parameter's type. The enumerator values /// match with the table titled 'Conversions' in [over.ics.scs] and are listed /// such that better conversion kinds have smaller values. + /// Changes to the next two enumerations require corresponding changes + /// to clang::GetConversionRank. enum ImplicitConversionKind { /// Identity conversion (no conversion) ICK_Identity = 0, @@ -114,6 +116,9 @@ class Sema; /// Function-to-pointer (C++ [conv.array]) ICK_Function_To_Pointer, + /// OpenCilk extension + ICK_Hyperobject_To_View, + /// Function pointer conversion (C++17 [conv.fctptr]) ICK_Function_Conversion, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3752a23faa85cb..2230e4c12bf428 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2092,6 +2092,8 @@ class Sema final { SourceLocation AttrLoc); QualType BuildMatrixType(QualType T, Expr *NumRows, Expr *NumColumns, SourceLocation AttrLoc); + QualType BuildHyperobjectType(QualType Element, Expr *Identity, Expr *Reduce, + SourceLocation Loc); QualType BuildAddressSpaceAttr(QualType &T, LangAS ASIdx, Expr *AddrSpace, SourceLocation AttrLoc); @@ -5241,6 +5243,35 @@ class Sema final { const NamedReturnInfo &NRInfo, Expr *Value, bool SupressSimplerImplicitMoves = false); + void DiagnoseCilkSpawn(Stmt *S); + StmtResult ActOnCilkScopeStmt(SourceLocation ScopeLoc, Stmt *S); + StmtResult ActOnCilkSyncStmt(SourceLocation SyncLoc); + StmtResult ActOnCilkSpawnStmt(SourceLocation SpawnLoc, Stmt *S); + ExprResult ActOnCilkSpawnExpr(SourceLocation SpawnLoc, Expr *E); + StmtResult HandleSimpleCilkForStmt(SourceLocation CilkForLoc, + SourceLocation LParenLoc, + Stmt *First, + Expr *Condition, + Expr *Increment, + SourceLocation RParenLoc, + Stmt *Body); + StmtResult LiftCilkForLoopLimit(SourceLocation CilkForLoc, + Stmt *First, Expr **Second); + StmtResult ActOnCilkForStmt(SourceLocation CilkForLoc, + SourceLocation LParenLoc, Stmt *Init, + DeclStmt *Limit, ConditionResult InitCond, + DeclStmt *Begin, DeclStmt *End, + ConditionResult second, FullExprArg third, + SourceLocation RParenLoc, Stmt *Body, + DeclStmt *LoopVar = nullptr); + + StmtResult BuildCilkForStmt(SourceLocation CilkForLoc, + SourceLocation LParenLoc, + Stmt *Init, Expr *Cond, Expr *Inc, + SourceLocation RParenLoc, Stmt *Body, + Expr *LoopCount, Expr *Stride, + QualType SpanType); + StmtResult ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, Scope *CurScope); StmtResult BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, @@ -5651,6 +5682,9 @@ class Sema final { SourceLocation TemplateKWLoc = SourceLocation(), const TemplateArgumentListInfo *TemplateArgs = nullptr); + Expr *BuildHyperobjectLookup(Expr *, bool Pointer = false); + Expr *ValidateReducerCallback(Expr *E, unsigned NumArgs, SourceLocation Loc); + ExprResult BuildAnonymousStructUnionMemberReference( const CXXScopeSpec &SS, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 2ae9e09998c4c1..cb319c0a401344 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -2023,6 +2023,13 @@ enum StmtCode { // SYCLUniqueStableNameExpr EXPR_SYCL_UNIQUE_STABLE_NAME, + + // Cilk Plus + STMT_CILKSPAWN, + EXPR_CILKSPAWN, + STMT_CILKSYNC, + STMT_CILKFOR, + STMT_CILKSCOPE, }; /// The kinds of designators that can occur in a diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def index 89ae1a2fa39546..6acbc505e78cdb 100644 --- a/clang/include/clang/Serialization/TypeBitCodes.def +++ b/clang/include/clang/Serialization/TypeBitCodes.def @@ -64,5 +64,6 @@ TYPE_BIT_CODE(ConstantMatrix, CONSTANT_MATRIX, 52) TYPE_BIT_CODE(DependentSizedMatrix, DEPENDENT_SIZE_MATRIX, 53) TYPE_BIT_CODE(Using, USING, 54) TYPE_BIT_CODE(BTFTagAttributed, BTFTAG_ATTRIBUTED, 55) +TYPE_BIT_CODE(Hyperobject, HYPEROBJECT, 56) #undef TYPE_BIT_CODE diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 76000156fece7c..5f274737ebcc5d 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2287,6 +2287,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { Align = EltInfo.Align; break; } + case Type::Hyperobject: + return getTypeInfo(cast(T)->getElementType()); case Type::ObjCObject: return getTypeInfo(cast(T)->getBaseType().getTypePtr()); case Type::Adjusted: @@ -3311,6 +3313,64 @@ QualType ASTContext::getComplexType(QualType T) const { return QualType(New, 0); } +static const FunctionDecl *getFunction(Expr *E) { + if (!E || E->getType()->isDependentType()) + return nullptr; + E = E->IgnoreImpCasts(); + if (const UnaryOperator *U = dyn_cast(E)) { + if (U->getOpcode() != UO_AddrOf) + return nullptr; + E = U->getSubExpr(); + } + const DeclRefExpr *DR = dyn_cast(E->IgnoreImpCasts()); + if (!DR) + return nullptr; + const FunctionDecl *F = dyn_cast(DR->getDecl()); + if (!F) + return nullptr; + return F->getFirstDecl(); +} + +QualType ASTContext::getHyperobjectType(QualType T, Expr *I, Expr *R) const { + assert(I && R); + bool IN = HyperobjectType::isNullish(I); + bool RN = HyperobjectType::isNullish(R); + + const FunctionDecl *IF = getFunction(I); + const FunctionDecl *RF = getFunction(R); + bool Varies = (!IN && !IF) || (!RN && !RF); + + QualType Canonical; + if (!T.isCanonical()) + Canonical = getHyperobjectType(getCanonicalType(T), I, R); + + // Do not unique hyperobject types with variable expressions. + if (Varies) { + auto *New = + new (*this, TypeAlignment) + HyperobjectType(T, Canonical, I, IF, R, RF); + Types.push_back(New); + return QualType(New, 0); + } + + // Unique pointers, to guarantee there is only one pointer of a particular + // structure. + // TODO: 0 and nullptr are not properly treated as equivalent here. + llvm::FoldingSetNodeID ID; + HyperobjectType::Profile(ID, T, IF, RF); + + void *InsertPos = nullptr; + if (HyperobjectType *HT = HyperobjectTypes.FindNodeOrInsertPos(ID, InsertPos)) + return QualType(HT, 0); + + auto *New = + new (*this, TypeAlignment) + HyperobjectType(T, Canonical, I, IF, R, RF); + Types.push_back(New); + HyperobjectTypes.InsertNode(New, InsertPos); + return QualType(New, 0); +} + /// getPointerType - Return the uniqued reference to the type for a pointer to /// the specified type. QualType ASTContext::getPointerType(QualType T) const { @@ -3689,6 +3749,9 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { break; } + case Type::Hyperobject: + return getVariableArrayDecayedType(cast(ty)->getElementType()); + case Type::ConstantArray: { const auto *cat = cast(ty); result = getConstantArrayType( @@ -8223,6 +8286,10 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, /*Field=*/nullptr); return; + case Type::Hyperobject: + llvm_unreachable("hyperobject not implemented"); + return; + case Type::Atomic: S += 'A'; getObjCEncodingForTypeImpl(T->castAs()->getValueType(), S, @@ -9411,6 +9478,21 @@ Qualifiers::GC ASTContext::getObjCGCAttrKind(QualType Ty) const { // Type Compatibility Testing //===----------------------------------------------------------------------===// +static QualType mergeHyperobjectTypes(QualType LQ, QualType RQ) { + const HyperobjectType *LH = LQ->castAs(); + const HyperobjectType *RH = RQ->castAs(); + if (LH->getElementType() != RH->getElementType()) + return {}; + bool LeftCallbacks = LH->hasCallbacks(), RightCallbacks = RH->hasCallbacks(); + if (LeftCallbacks && RightCallbacks) + return {}; + if (LeftCallbacks && !RightCallbacks) + return LQ; + if (RightCallbacks) + return RQ; + llvm_unreachable("hyperobjects not uniqued"); +} + /// areCompatVectorTypes - Return true if the two specified vector types are /// compatible. static bool areCompatVectorTypes(const VectorType *LHS, @@ -10846,6 +10928,8 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, bool OfBlockPointer, case Type::Complex: // Distinct complex types are incompatible. return {}; + case Type::Hyperobject: + return mergeHyperobjectTypes(LHSCan, RHSCan); case Type::Vector: // FIXME: The merged type should be an ExtVector! if (areCompatVectorTypes(LHSCan->castAs(), @@ -12972,6 +13056,14 @@ static QualType getCommonNonSugarTypeNode(ASTContext &Ctx, const Type *X, TX->getDepth(), TX->getIndex(), TX->isParameterPack(), getCommonDecl(TX->getDecl(), TY->getDecl())); } + case Type::Hyperobject: { + const auto *HX = cast(X), *HY = cast(Y); + assert(Ctx.hasSameExpr(HX->getIdentity(), HY->getIdentity())); + assert(Ctx.hasSameExpr(HX->getReduce(), HY->getReduce())); + return Ctx.getHyperobjectType( + Ctx.getCommonSugaredType(HX->getElementType(), HY->getElementType()), + HX->getIdentity(), HX->getReduce()); + } } llvm_unreachable("Unknown Type Class"); } @@ -13002,6 +13094,7 @@ static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X, CANONICAL_TYPE(ExtVector) CANONICAL_TYPE(FunctionNoProto) CANONICAL_TYPE(FunctionProto) + CANONICAL_TYPE(Hyperobject) CANONICAL_TYPE(IncompleteArray) CANONICAL_TYPE(LValueReference) CANONICAL_TYPE(MemberPointer) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 39c7a8fa397048..c1e1c7a3d30972 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -609,6 +609,10 @@ namespace clang { ExpectedStmt VisitObjCAtSynchronizedStmt(ObjCAtSynchronizedStmt *S); ExpectedStmt VisitObjCAtThrowStmt(ObjCAtThrowStmt *S); ExpectedStmt VisitObjCAutoreleasePoolStmt(ObjCAutoreleasePoolStmt *S); + ExpectedStmt VisitCilkSpawnStmt(CilkSpawnStmt *S); + ExpectedStmt VisitCilkSyncStmt(CilkSyncStmt *S); + ExpectedStmt VisitCilkForStmt(CilkForStmt *S); + ExpectedStmt VisitCilkScopeStmt(CilkScopeStmt *S); // Importing expressions ExpectedStmt VisitExpr(Expr *E); @@ -6953,6 +6957,57 @@ ExpectedStmt ASTNodeImporter::VisitObjCAutoreleasePoolStmt( *ToSubStmtOrErr); } +ExpectedStmt ASTNodeImporter::VisitCilkSpawnStmt(CilkSpawnStmt *S) { + ExpectedSLoc ToSpawnLocOrErr = import(S->getSpawnLoc()); + if (!ToSpawnLocOrErr) + return ToSpawnLocOrErr.takeError(); + ExpectedStmt ToChildOrErr = import(S->getSpawnedStmt()); + if (!ToChildOrErr) + return ToChildOrErr.takeError(); + return new (Importer.getToContext()) CilkSpawnStmt(*ToSpawnLocOrErr, + *ToChildOrErr); +} + +ExpectedStmt ASTNodeImporter::VisitCilkSyncStmt(CilkSyncStmt *S) { + ExpectedSLoc ToSyncLocOrErr = import(S->getSyncLoc()); + if (!ToSyncLocOrErr) + return ToSyncLocOrErr.takeError(); + return new (Importer.getToContext()) CilkSyncStmt(*ToSyncLocOrErr); +} + +ExpectedStmt ASTNodeImporter::VisitCilkForStmt(CilkForStmt *S) { + Error Err = Error::success(); + auto ToInit = importChecked(Err, S->getInit()); + auto ToLimitStmt = importChecked(Err, S->getLimitStmt()); + auto ToInitCond = importChecked(Err, S->getInitCond()); + auto ToBeginStmt = importChecked(Err, S->getBeginStmt()); + auto ToEndStmt = importChecked(Err, S->getEndStmt()); + auto ToCond = importChecked(Err, S->getCond()); + auto ToInc = importChecked(Err, S->getInc()); + auto ToLoopVarStmt = importChecked(Err, S->getLoopVarStmt()); + auto ToBody = importChecked(Err, S->getBody()); + auto ToCilkForLoc = importChecked(Err, S->getCilkForLoc()); + auto ToLParenLoc = importChecked(Err, S->getLParenLoc()); + auto ToRParenLoc = importChecked(Err, S->getRParenLoc()); + if (Err) + return std::move(Err); + + return new (Importer.getToContext()) CilkForStmt( + ToInit, ToLimitStmt, ToInitCond, ToBeginStmt, ToEndStmt, ToCond, ToInc, + ToLoopVarStmt, ToBody, ToCilkForLoc, ToLParenLoc, ToRParenLoc); +} + +ExpectedStmt ASTNodeImporter::VisitCilkScopeStmt(CilkScopeStmt *S) { + ExpectedSLoc ToScopeLocOrErr = import(S->getScopeLoc()); + if (!ToScopeLocOrErr) + return ToScopeLocOrErr.takeError(); + ExpectedStmt ToChildOrErr = import(S->getBody()); + if (!ToChildOrErr) + return ToChildOrErr.takeError(); + return new (Importer.getToContext()) CilkScopeStmt(*ToScopeLocOrErr, + *ToChildOrErr); +} + //---------------------------------------------------------------------------- // Import Expressions //---------------------------------------------------------------------------- diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index f867b6bf84beb7..eff319d1a01a4e 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -69,10 +69,12 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/NestedNameSpecifier.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/TemplateBase.h" @@ -753,6 +755,24 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, return false; break; + case Type::Hyperobject: { + const HyperobjectType *H1 = cast(T1); + const HyperobjectType *H2 = cast(T2); + Expr *R1 = H1->getReduce(), *R2 = H2->getReduce(); + Expr *I1 = H1->getIdentity(), *I2 = H2->getIdentity(); + if (!!I2 != !!I2 || !!R1 != !!R2) + return false; + if (I1 && !IsStructurallyEquivalent(Context, I1, I2)) + return false; + if (R1 && !IsStructurallyEquivalent(Context, R1, R2)) + return false; + if (!IsStructurallyEquivalent(Context, + cast(T1)->getElementType(), + cast(T2)->getElementType())) + return false; + break; + } + case Type::Adjusted: case Type::Decayed: if (!IsStructurallyEquivalent(Context, diff --git a/clang/lib/AST/AttrImpl.cpp b/clang/lib/AST/AttrImpl.cpp index f198a9acf8481f..b134632595073b 100644 --- a/clang/lib/AST/AttrImpl.cpp +++ b/clang/lib/AST/AttrImpl.cpp @@ -29,6 +29,9 @@ void LoopHintAttr::printPrettyPragma(raw_ostream &OS, SpellingIndex == Pragma_unroll_and_jam) { OS << ' ' << getValueString(Policy); return; + } else if (SpellingIndex == Pragma_cilk) { + OS << getOptionName(option) << getValueString(Policy); + return; } assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling"); @@ -78,6 +81,8 @@ LoopHintAttr::getDiagnosticName(const PrintingPolicy &Policy) const { else if (SpellingIndex == Pragma_unroll_and_jam) return "#pragma unroll_and_jam" + (option == UnrollAndJamCount ? getValueString(Policy) : ""); + else if (SpellingIndex == Pragma_cilk) + return getOptionName(option) + getValueString(Policy); assert(SpellingIndex == Pragma_clang_loop && "Unexpected spelling"); return getOptionName(option) + getValueString(Policy); diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp index 09df5401d6693a..5453359ce35f1d 100644 --- a/clang/lib/AST/ComputeDependence.cpp +++ b/clang/lib/AST/ComputeDependence.cpp @@ -13,6 +13,7 @@ #include "clang/AST/DependenceFlags.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -908,3 +909,7 @@ ExprDependence clang::computeDependence(ObjCMessageExpr *E) { D |= A->getDependence(); return D; } + +ExprDependence clang::computeDependence(CilkSpawnExpr *E) { + return E->getSpawnedExpr()->getDependence(); +} diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index fbc45fb6397fdc..d8d59f95eb1fad 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2762,7 +2762,20 @@ VarDecl::needsDestruction(const ASTContext &Ctx) const { if (isNoDestroy(Ctx)) return QualType::DK_none; - return getType().isDestructedType(); + QualType Type = getType(); + + if (const HyperobjectType *H = Type->getAs()) { + // CodeGenFunction::destroyHyperobject will run the inner destructor. + if (H->hasCallbacks()) + return QualType::DK_hyperobject; + Type = H->getElementType(); + } + + QualType::DestructionKind Kind = Type.isDestructedType(); + if (Kind != QualType::DK_none) + return Kind; + + return QualType::DK_none; } bool VarDecl::hasFlexibleArrayInit(const ASTContext &Ctx) const { @@ -2845,6 +2858,12 @@ VarDecl::setInstantiationOfStaticDataMember(VarDecl *VD, getASTContext().setInstantiatedFromStaticDataMember(this, VD, TSK); } +bool VarDecl::isReducer() const { + if (const HyperobjectType *H = getType()->getAs()) + return H->hasCallbacks(); + return false; +} + //===----------------------------------------------------------------------===// // ParmVarDecl Implementation //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 6164a419d213fd..50321ba1ae3a9a 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -74,6 +74,8 @@ const CXXRecordDecl *Expr::getBestDynamicClassType() const { if (DerivedType->isDependentType()) return nullptr; + DerivedType = DerivedType.stripHyperobject(); + const RecordType *Ty = DerivedType->castAs(); Decl *D = Ty->getDecl(); return cast(D); @@ -3687,6 +3689,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx, case ConvertVectorExprClass: case AsTypeExprClass: case CXXParenListInitExprClass: + case CilkSpawnExprClass: // These have a side-effect if any subexpression does. break; diff --git a/clang/lib/AST/ExprClassification.cpp b/clang/lib/AST/ExprClassification.cpp index 12193b7812f9bf..59b57adb4a1098 100644 --- a/clang/lib/AST/ExprClassification.cpp +++ b/clang/lib/AST/ExprClassification.cpp @@ -15,6 +15,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "llvm/Support/ErrorHandling.h" @@ -303,6 +304,11 @@ static Cl::Kinds ClassifyInternal(ASTContext &Ctx, const Expr *E) { case Expr::ParenExprClass: return ClassifyInternal(Ctx, cast(E)->getSubExpr()); + // A _Cilk_spawn does not affect the classification of the spawned + // expression. + case Expr::CilkSpawnExprClass: + return ClassifyInternal(Ctx, cast(E)->getSpawnedExpr()); + // C11 6.5.1.1p4: [A generic selection] is an lvalue, a function designator, // or a void expression if its result expression is, respectively, an // lvalue, a function designator, or a void expression. @@ -671,10 +677,15 @@ static Cl::ModifiableType IsModifiable(ASTContext &Ctx, const Expr *E, if (CT->isIncompleteType()) return Cl::CM_IncompleteType; - // Records with any const fields (recursively) are not modifiable. - if (const RecordType *R = CT->getAs()) + if (const RecordType *R = CT->getAs()) { + // Records with any const fields (recursively) are not modifiable. if (R->hasConstFields()) return Cl::CM_ConstQualifiedField; + // Records with hyperobject fields are not assignable as records. + // This is an implementation restriction. + if (R->hasHyperobjectFields()) + return Cl::CM_HyperobjectField; + } return Cl::CM_Modifiable; } @@ -730,6 +741,7 @@ Expr::isModifiableLvalue(ASTContext &Ctx, SourceLocation *Loc) const { case Cl::CM_ConstQualified: return MLV_ConstQualified; case Cl::CM_ConstQualifiedField: return MLV_ConstQualifiedField; case Cl::CM_ConstAddrSpace: return MLV_ConstAddrSpace; + case Cl::CM_HyperobjectField: return MLV_HyperobjectField; case Cl::CM_ArrayType: return MLV_ArrayType; case Cl::CM_IncompleteType: return MLV_IncompleteType; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 99ae88a6cd6924..19d4b0aee14507 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11462,6 +11462,7 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) { return EvaluateBuiltinClassifyType( CanTy->castAs()->getValueType(), LangOpts); + case Type::Hyperobject: case Type::BlockPointer: case Type::Vector: case Type::ExtVector: @@ -15510,7 +15511,7 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, } SourceLocation DeclLoc = VD->getLocation(); - QualType DeclTy = VD->getType(); + QualType DeclTy = VD->getType().stripHyperobject(); return CheckConstantExpression(Info, DeclLoc, DeclTy, Value, ConstantExprKind::Normal) && CheckMemoryLeaks(Info); @@ -15790,6 +15791,8 @@ static ICEDiag CheckICE(const Expr* E, const ASTContext &Ctx) { return CheckICE(cast(E)->getSubExpr(), Ctx); case Expr::GenericSelectionExprClass: return CheckICE(cast(E)->getResultExpr(), Ctx); + case Expr::CilkSpawnExprClass: + return CheckICE(cast(E)->getSpawnedExpr(), Ctx); case Expr::IntegerLiteralClass: case Expr::FixedPointLiteralClass: case Expr::CharacterLiteralClass: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index f08286a0d4baef..fa8c135105d8a9 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2294,6 +2294,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, case Type::Adjusted: case Type::Decayed: case Type::Pointer: + case Type::Hyperobject: case Type::BlockPointer: case Type::LValueReference: case Type::RValueReference: @@ -3580,6 +3581,11 @@ void CXXNameMangler::mangleType(const ComplexType *T) { mangleType(T->getElementType()); } +void CXXNameMangler::mangleType(const HyperobjectType *H) { + Out << 'H'; + mangleType(H->getElementType()); +} + // ARM's ABI for Neon vector types specifies that they should be mangled as // if they are structs (to match ARM's initial implementation). The // vector type must be one of the special types predefined by ARM. @@ -4442,6 +4448,7 @@ void CXXNameMangler::mangleExpression(const Expr *E, unsigned Arity, case Expr::OMPIteratorExprClass: case Expr::CXXInheritedCtorInitExprClass: case Expr::CXXParenListInitExprClass: + case Expr::CilkSpawnExprClass: llvm_unreachable("unexpected statement kind"); case Expr::ConstantExprClass: diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 3306d90dc85664..563a036f7becbc 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3133,6 +3133,11 @@ void MicrosoftCXXNameMangler::mangleType(const ComplexType *T, Qualifiers, mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"}); } +void MicrosoftCXXNameMangler::mangleType(const HyperobjectType *H, Qualifiers, + SourceRange Range) { + llvm_unreachable("hyperobject not implemented for Microsoft ABI"); +} + // Returns true for types that mangleArtificialTagType() gets called for with // TTK_Union, TTK_Struct, TTK_Class and where compatibility with MSVC's // mangling matters. diff --git a/clang/lib/AST/Stmt.cpp b/clang/lib/AST/Stmt.cpp index c31fb48a2addfa..4d7ca1fa0d1ffc 100644 --- a/clang/lib/AST/Stmt.cpp +++ b/clang/lib/AST/Stmt.cpp @@ -18,9 +18,12 @@ #include "clang/AST/DeclGroup.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" +#include "clang/AST/Stmt.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" @@ -1436,3 +1439,21 @@ bool CapturedStmt::capturesVariable(const VarDecl *Var) const { return false; } + +// CilkForStmt +CilkForStmt::CilkForStmt(Stmt *Init, DeclStmt *Limit, Expr *InitCond, + DeclStmt *BeginStmt, DeclStmt *EndStmt, Expr *Cond, + Expr *Inc, DeclStmt *LoopVar, Stmt *Body, + SourceLocation CFL, SourceLocation LP, + SourceLocation RP) + : Stmt(CilkForStmtClass), CilkForLoc(CFL), LParenLoc(LP), RParenLoc(RP) { + SubExprs[INIT] = Init; + SubExprs[LIMIT] = Limit; + SubExprs[INITCOND] = InitCond; + SubExprs[BEGINSTMT] = BeginStmt; + SubExprs[ENDSTMT] = EndStmt; + SubExprs[COND] = Cond; + SubExprs[INC] = Inc; + SubExprs[LOOPVAR] = LoopVar; + SubExprs[BODY] = Body; +} diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp index c3db500d8a8def..234509b20ac10e 100644 --- a/clang/lib/AST/StmtPrinter.cpp +++ b/clang/lib/AST/StmtPrinter.cpp @@ -20,6 +20,7 @@ #include "clang/AST/DeclOpenMP.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -476,6 +477,18 @@ void StmtPrinter::VisitBreakStmt(BreakStmt *Node) { if (Policy.IncludeNewlines) OS << NL; } +void StmtPrinter::VisitCilkSpawnStmt(CilkSpawnStmt *Node) { + Indent() << "_Cilk_spawn "; + PrintStmt(Node->getSpawnedStmt()); + OS << ";"; + if (Policy.IncludeNewlines) OS << "\n"; +} + +void StmtPrinter::VisitCilkSpawnExpr(CilkSpawnExpr *Node) { + Indent() << "_Cilk_spawn "; + PrintExpr(Node->getSpawnedExpr()); +} + void StmtPrinter::VisitReturnStmt(ReturnStmt *Node) { Indent() << "return"; if (Node->getRetValue()) { @@ -2740,6 +2753,60 @@ void StmtPrinter::VisitAsTypeExpr(AsTypeExpr *Node) { OS << ")"; } +void StmtPrinter::VisitCilkSyncStmt(CilkSyncStmt *) { + Indent() << "_Cilk_sync;"; + if (Policy.IncludeNewlines) OS << "\n"; +} + +void StmtPrinter::VisitCilkForStmt(CilkForStmt *Node) { + Indent() << "_Cilk_for ("; + if (Node->getInit()) { + if (DeclStmt *DS = dyn_cast(Node->getInit())) + PrintRawDeclStmt(DS); + else + PrintExpr(cast(Node->getInit())); + } + OS << ";"; + if (Node->getCond()) { + OS << " "; + PrintExpr(Node->getCond()); + } + OS << ";"; + if (Node->getInc()) { + OS << " "; + PrintExpr(Node->getInc()); + } + OS << ") "; + + if (const DeclStmt *DS = Node->getLoopVarStmt()) { + OS << "{\n"; + PrintRawDeclStmt(DS); + } + + if (CompoundStmt *CS = dyn_cast(Node->getBody())) { + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } + + if (Node->getLoopVarStmt()) + Indent() << "}"; +} + +void StmtPrinter::VisitCilkScopeStmt(CilkScopeStmt *Node) { + Indent() << "_Cilk_scope "; + + if (CompoundStmt *CS = dyn_cast(Node->getBody())) { + PrintRawCompoundStmt(CS); + OS << "\n"; + } else { + OS << "\n"; + PrintStmt(Node->getBody()); + } +} + //===----------------------------------------------------------------------===// // Stmt method implementations //===----------------------------------------------------------------------===// diff --git a/clang/lib/AST/StmtProfile.cpp b/clang/lib/AST/StmtProfile.cpp index d8a667b2d0fdc4..bcd93cfc13554b 100644 --- a/clang/lib/AST/StmtProfile.cpp +++ b/clang/lib/AST/StmtProfile.cpp @@ -2257,6 +2257,26 @@ void StmtProfiler::VisitCoyieldExpr(const CoyieldExpr *S) { VisitExpr(S); } +void StmtProfiler::VisitCilkForStmt(const CilkForStmt *S) { + VisitStmt(S); +} + +void StmtProfiler::VisitCilkSpawnStmt(const CilkSpawnStmt *S) { + VisitStmt(S); +} + +void StmtProfiler::VisitCilkSpawnExpr(const CilkSpawnExpr *E) { + VisitExpr(E); +} + +void StmtProfiler::VisitCilkSyncStmt(const CilkSyncStmt *S) { + VisitStmt(S); +} + +void StmtProfiler::VisitCilkScopeStmt(const CilkScopeStmt *S) { + VisitStmt(S); +} + void StmtProfiler::VisitOpaqueValueExpr(const OpaqueValueExpr *E) { VisitExpr(E); } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 99c859034423bb..f7e43046720cd1 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -3330,6 +3330,54 @@ QualType QualType::getNonLValueExprType(const ASTContext &Context) const { return *this; } +QualType QualType::stripHyperobject() const { + if (const auto *Hyperobject = getTypePtr()->getAs()) + return Hyperobject->getElementType(); + return *this; +} + +/// Check if the expression is nullptr, 0, or contains an error. +/// The more general isNullPointerConstant requires a non-const ASTContext. +bool HyperobjectType::isNullish(Expr *E) { + E = E->IgnoreParenCasts(); + switch (E->getStmtClass()) { + case Expr::CXXNullPtrLiteralExprClass: + return true; + case Expr::IntegerLiteralClass: + return cast(E)->getValue().isZero(); + case Expr::TypoExprClass: + return true; + default: + return E->getType()->containsErrors(); + } +} + +HyperobjectType::HyperobjectType(QualType Element, QualType CanonicalPtr, + Expr *i, const FunctionDecl *ifn, + Expr *r, const FunctionDecl *rfn) + : Type(Hyperobject, CanonicalPtr, Element->getDependence()), + ElementType(Element), Identity(i), Reduce(r), + IdentityID(ifn), ReduceID(rfn) { +} + +bool HyperobjectType::hasCallbacks() const { + return Identity && Reduce && !isNullish(Identity) && !isNullish(Reduce); +} + +void HyperobjectType::Profile(llvm::FoldingSetNodeID &ID) const { + Profile(ID, getElementType(), IdentityID, ReduceID); +} + +void HyperobjectType::Profile(llvm::FoldingSetNodeID &ID, QualType Pointee, + const FunctionDecl *I, const FunctionDecl *R) { + ID.AddPointer(Pointee.getAsOpaquePtr()); + // In normal use both I and R will be non-null or neither of them will be. + if (I) + ID.AddPointer(I); + if (R) + ID.AddPointer(R); +} + StringRef FunctionType::getNameForCallConv(CallingConv CC) { switch (CC) { case CC_C: return "cdecl"; @@ -3758,6 +3806,29 @@ bool RecordType::hasConstFields() const { return false; } +// This is cut and pasted from hasConstFields. +bool RecordType::hasHyperobjectFields() const { + std::vector RecordTypeList; + RecordTypeList.push_back(this); + unsigned NextToCheckIndex = 0; + + while (RecordTypeList.size() > NextToCheckIndex) { + for (FieldDecl *FD : + RecordTypeList[NextToCheckIndex]->getDecl()->fields()) { + QualType FieldTy = FD->getType(); + if (FieldTy->isHyperobjectType()) + return true; + FieldTy = FieldTy.getCanonicalType(); + if (const auto *FieldRecTy = FieldTy->getAs()) { + if (llvm::find(RecordTypeList, FieldRecTy) == RecordTypeList.end()) + RecordTypeList.push_back(FieldRecTy); + } + } + ++NextToCheckIndex; + } + return false; +} + bool AttributedType::isQualifier() const { // FIXME: Generate this with TableGen. switch (getAttrKind()) { @@ -4182,6 +4253,8 @@ static CachedProperties computeCachedProperties(const Type *T) { // compounded exclusively from types that have linkage; or case Type::Complex: return Cache::get(cast(T)->getElementType()); + case Type::Hyperobject: + return Cache::get(cast(T)->getElementType()); case Type::Pointer: return Cache::get(cast(T)->getPointeeType()); case Type::BlockPointer: @@ -4269,6 +4342,8 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) { case Type::Complex: return computeTypeLinkageInfo(cast(T)->getElementType()); + case Type::Hyperobject: + return computeTypeLinkageInfo(cast(T)->getElementType()); case Type::Pointer: return computeTypeLinkageInfo(cast(T)->getPointeeType()); case Type::BlockPointer: @@ -4452,6 +4527,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { // Non-pointer types. case Type::Complex: + case Type::Hyperobject: case Type::LValueReference: case Type::RValueReference: case Type::ConstantArray: @@ -4669,6 +4745,16 @@ QualType::DestructionKind QualType::isDestructedTypeImpl(QualType type) { return DK_objc_weak_lifetime; } + if (const HyperobjectType *HT = type->getAs()) { + QualType Inner = HT->getElementType(); + QualType::DestructionKind DK_Inner = isDestructedTypeImpl(Inner); + if (DK_Inner != DK_none) + return DK_Inner; + if (HT->hasCallbacks()) + return DK_hyperobject; + return DK_none; + } + if (const auto *RT = type->getBaseElementTypeUnsafe()->getAs()) { const RecordDecl *RD = RT->getDecl(); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 1b62f6630928c1..7b58233beaaca5 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -250,6 +250,10 @@ bool TypePrinter::canPrefixQualifiers(const Type *T, CanPrefixQualifiers = true; break; + case Type::Hyperobject: + CanPrefixQualifiers = true; /* or maybe false */ + break; + case Type::ObjCObjectPointer: CanPrefixQualifiers = T->isObjCIdType() || T->isObjCClassType() || T->isObjCQualifiedIdType() || T->isObjCQualifiedClassType(); @@ -394,6 +398,27 @@ void TypePrinter::printComplexAfter(const ComplexType *T, raw_ostream &OS) { printAfter(T->getElementType(), OS); } +void TypePrinter::printHyperobjectBefore(const HyperobjectType *T, + raw_ostream &OS) { + SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); + printBefore(T->getElementType(), OS); + OS << "_Hyperobject"; + if (T->hasCallbacks()) { + Expr *I = T->getIdentity(); + Expr *R = T->getReduce(); + OS << '('; + I->printPretty(OS, nullptr, Policy); + OS << ", "; + R->printPretty(OS, nullptr, Policy); + OS << ")"; + } +} + +void TypePrinter::printHyperobjectAfter(const HyperobjectType *T, + raw_ostream &OS) { + printAfter(T->getElementType(), OS); +} + void TypePrinter::printPointerBefore(const PointerType *T, raw_ostream &OS) { IncludeStrongLifetimeRAII Strong(Policy); SaveAndRestore NonEmptyPH(HasEmptyPlaceHolder, false); diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 4304693e473dee..def68aedd71cb2 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -113,6 +113,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { return false; } + bool hasSjLjLowering() const override { return true; } + void setArchFeatures(); void getTargetDefinesARMV81A(const LangOptions &Opts, diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 483f3e787a7805..c5be0d5f5c9ea9 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -72,10 +72,13 @@ #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h" #include "llvm/Transforms/Instrumentation/SanitizerCoverage.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" +#include "llvm/Transforms/Instrumentation/CilkSanitizer.h" +#include "llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h" #include "llvm/Transforms/ObjCARC.h" #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/Tapir/TapirToTarget.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -255,6 +258,38 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return false; } +static CSIOptions getCSIOptionsForCilkscale(bool InstrumentBasicBlocks) { + CSIOptions Options; + // Disable CSI hooks that Cilkscale doesn't need. + Options.InstrumentBasicBlocks = InstrumentBasicBlocks; + Options.InstrumentLoops = false; + Options.InstrumentMemoryAccesses = false; + Options.InstrumentCalls = false; + Options.InstrumentAtomics = false; + Options.InstrumentMemIntrinsics = false; + Options.InstrumentAllocas = false; + Options.InstrumentAllocFns = false; + return Options; +} + +static CSIOptions getCSIOptionsForCilkscaleBenchmark() { + CSIOptions Options; + // Disable CSI hooks that Cilkscale doesn't need. + Options.InstrumentFuncEntryExit = false; + Options.InstrumentBasicBlocks = false; + Options.InstrumentLoops = false; + Options.InstrumentMemoryAccesses = false; + Options.InstrumentCalls = false; + Options.InstrumentAtomics = false; + Options.InstrumentMemIntrinsics = false; + Options.InstrumentTapir = false; + Options.InstrumentAllocas = false; + Options.InstrumentAllocFns = false; + Options.CallsMayThrow = false; + Options.CallsTerminateBlocks = false; + return Options; +} + static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts) { TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); @@ -291,6 +326,12 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, default: break; } + + TLII->setTapirTarget(CodeGenOpts.getTapirTarget()); + TLII->setTapirTargetOptions( + std::make_unique(CodeGenOpts.OpenCilkABIBitcodeFile)); + TLII->addTapirTargetLibraryFunctions(); + return TLII; } @@ -831,6 +872,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( PTO.LoopInterleaving = CodeGenOpts.UnrollLoops; PTO.LoopVectorization = CodeGenOpts.VectorizeLoop; PTO.SLPVectorization = CodeGenOpts.VectorizeSLP; + PTO.LoopStripmine = CodeGenOpts.StripmineLoop; PTO.MergeFunctions = CodeGenOpts.MergeFunctions; // Only enable CGProfilePass when using integrated assembler, since // non-integrated assemblers don't recognize .cgprofile section. @@ -1014,13 +1056,96 @@ void EmitAssemblyHelper::RunOptimizationPipeline( MPM.addPass(ModuleMemProfilerPass()); }); } + // Register the Cilksan pass. + if (LangOpts.Sanitize.has(SanitizerKind::Cilk)) + PB.registerTapirLateEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass()); + MPM.addPass(CilkSanitizerPass()); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + // Register CSI instrumentation for Cilkscale + if (LangOpts.getCilktool() != LangOptions::CilktoolKind::Cilktool_None) { + switch (LangOpts.getCilktool()) { + default: + break; + case LangOptions::CilktoolKind::Cilktool_Cilkscale: + PB.registerTapirLoopEndEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass(getCSIOptionsForCilkscale(false))); + MPM.addPass(ComprehensiveStaticInstrumentationPass( + getCSIOptionsForCilkscale(false))); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CilktoolKind::Cilktool_Cilkscale_InstructionCount: + PB.registerTapirLoopEndEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass(getCSIOptionsForCilkscale(true))); + MPM.addPass(ComprehensiveStaticInstrumentationPass( + getCSIOptionsForCilkscale(true))); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CilktoolKind::Cilktool_Cilkscale_Benchmark: + PB.registerTapirLoopEndEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass(getCSIOptionsForCilkscaleBenchmark())); + MPM.addPass(ComprehensiveStaticInstrumentationPass( + getCSIOptionsForCilkscaleBenchmark())); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + } + } + // Register the CSI pass. + if (LangOpts.getComprehensiveStaticInstrumentation()) { + switch (LangOpts.getComprehensiveStaticInstrumentation()) { + case LangOptions::CSI_EarlyAsPossible: + case LangOptions::CSI_ModuleOptimizerEarly: + PB.registerPipelineStartEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass()); + MPM.addPass(ComprehensiveStaticInstrumentationPass()); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CSI_TapirLate: + PB.registerTapirLateEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass()); + MPM.addPass(ComprehensiveStaticInstrumentationPass()); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CSI_TapirLoopEnd: + PB.registerTapirLoopEndEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass()); + MPM.addPass(ComprehensiveStaticInstrumentationPass()); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CSI_OptimizerLast: + PB.registerOptimizerLastEPCallback( + [&PB](ModulePassManager &MPM, OptimizationLevel Level) { + MPM.addPass(CSISetupPass()); + MPM.addPass(ComprehensiveStaticInstrumentationPass()); + MPM.addPass(PB.buildPostCilkInstrumentationPipeline(Level)); + }); + break; + case LangOptions::CSI_None: + break; + } + } if (IsThinLTO || (IsLTO && CodeGenOpts.UnifiedLTO)) { MPM = PB.buildThinLTOPreLinkDefaultPipeline(Level); } else if (IsLTO) { MPM = PB.buildLTOPreLinkDefaultPipeline(Level); } else { - MPM = PB.buildPerModuleDefaultPipeline(Level); + MPM = PB.buildPerModuleDefaultPipeline(Level, /* LTOPreLink */ false, + TLII->hasTapirTarget()); } } @@ -1214,6 +1339,8 @@ static void runThinLTOBackend( Conf.RemarksFormat = CGOpts.OptRecordFormat; Conf.SplitDwarfFile = CGOpts.SplitDwarfFile; Conf.SplitDwarfOutput = CGOpts.SplitDwarfOutput; + Conf.TapirTarget = CGOpts.getTapirTarget(); + Conf.OpenCilkABIBitcodeFile = CGOpts.OpenCilkABIBitcodeFile; switch (Action) { case Backend_EmitNothing: Conf.PreCodeGenModuleHook = [](size_t Task, const Module &Mod) { diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp index 222b0a192c85e2..2e5726811a6f8d 100644 --- a/clang/lib/CodeGen/CGAtomic.cpp +++ b/clang/lib/CodeGen/CGAtomic.cpp @@ -828,6 +828,10 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { return RValue::get(nullptr); } + // RAII to finish detach scope after processing AtomicExpr E, if E uses a + // spawned value. + DetachScopeRAII DetScope(*this); + auto TInfo = getContext().getTypeInfoInChars(AtomicTy); uint64_t Size = TInfo.Width.getQuantity(); unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth(); diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index cfbe3272196e35..778c847c178d4d 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -2045,6 +2045,8 @@ computeDestroyInfoForBlockCapture(const BlockDecl::Capture &CI, QualType T, } switch (T.isDestructedType()) { + case QualType::DK_hyperobject: + llvm_unreachable("hyperobject cleanup not implemented"); case QualType::DK_cxx_destructor: return std::make_pair(BlockCaptureEntityKind::CXXRecord, BlockFieldFlags()); case QualType::DK_objc_strong_lifetime: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 30f5f4e7061c05..7fafc4f2406d48 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -202,12 +202,25 @@ static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF, return Ptr.getPointer(); } +/// Utility function to start a detach if necessary. +static void MaybeDetach(CodeGenFunction *CGF, + CodeGenFunction::IsSpawnedScope &SpawnedScp) { + if (SpawnedScp.OldScopeIsSpawned()) { + SpawnedScp.RestoreOldScope(); + assert(CGF->CurDetachScope && + "A call was spawned, but no detach scope was pushed."); + if (!CGF->CurDetachScope->IsDetachStarted()) + CGF->CurDetachScope->StartDetach(); + } +} + /// Utility to insert an atomic instruction based on Intrinsic::ID /// and the expression node. static Value *MakeBinaryAtomicValue( CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -229,14 +242,17 @@ static Value *MakeBinaryAtomicValue( llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); + MaybeDetach(&CGF, SpawnedScp); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( Kind, Args[0], Args[1], Ordering); return EmitFromInt(CGF, Result, T, ValueType); } static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); Value *Val = CGF.EmitScalarExpr(E->getArg(0)); Value *Address = CGF.EmitScalarExpr(E->getArg(1)); + MaybeDetach(&CGF, SpawnedScp); // Convert the type of the pointer to a pointer to the stored type. Val = CGF.EmitToMemory(Val, E->getArg(0)->getType()); @@ -250,7 +266,9 @@ static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) { } static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); Value *Address = CGF.EmitScalarExpr(E->getArg(0)); + MaybeDetach(&CGF, SpawnedScp); LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType()); LV.setNontemporal(true); @@ -271,6 +289,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, const CallExpr *E, Instruction::BinaryOps Op, bool Invert = false) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); QualType T = E->getType(); assert(E->getArg(0)->getType()->isPointerType()); assert(CGF.getContext().hasSameUnqualifiedType(T, @@ -288,6 +307,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[0] = DestPtr; + MaybeDetach(&CGF, SpawnedScp); llvm::Value *Result = CGF.Builder.CreateAtomicRMW( Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent); Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]); @@ -315,6 +335,7 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, /// invoke the function EmitAtomicCmpXchgForMSIntrin. static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType(); llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E); @@ -327,6 +348,7 @@ static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *ValueType = Args[1]->getType(); Args[1] = EmitToInt(CGF, Args[1], T, IntType); Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType); + MaybeDetach(&CGF, SpawnedScp); Value *Pair = CGF.Builder.CreateAtomicCmpXchg( Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent, @@ -501,14 +523,17 @@ static Value *EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E) { static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateConstrainedFPCall(F, { Src0 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, Src0); } } @@ -518,15 +543,18 @@ static Value *emitUnaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, static Value *emitBinaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } } @@ -555,6 +583,7 @@ static Value *emitBinaryExpMaybeConstrainedFPBuiltin( static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); @@ -562,9 +591,11 @@ static Value *emitTernaryMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, if (CGF.Builder.getIsFPConstrained()) { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateConstrainedFPCall(F, { Src0, Src1, Src2 }); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } } @@ -593,9 +624,11 @@ static Value *emitCallMaybeConstrainedFPBuiltin(CodeGenFunction &CGF, static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, llvm::StringRef Name = "") { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, Src0, Name); } @@ -603,10 +636,12 @@ static Value *emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, static Value *emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, { Src0, Src1 }); } @@ -614,11 +649,13 @@ static Value *emitBinaryBuiltin(CodeGenFunction &CGF, static Value *emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 }); } @@ -626,10 +663,12 @@ static Value *emitTernaryBuiltin(CodeGenFunction &CGF, static Value *emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1)); Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType()); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, {Src0, Src1}); } @@ -638,6 +677,7 @@ static Value * emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID, unsigned ConstrainedIntrinsicID) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); llvm::Type *ResultType = CGF.ConvertType(E->getType()); llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0)); @@ -645,10 +685,12 @@ emitMaybeConstrainedFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, E); Function *F = CGF.CGM.getIntrinsic(ConstrainedIntrinsicID, {ResultType, Src0->getType()}); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateConstrainedFPCall(F, {Src0}); } else { Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {ResultType, Src0->getType()}); + MaybeDetach(&CGF, SpawnedScp); return CGF.Builder.CreateCall(F, Src0); } } @@ -673,7 +715,9 @@ static Value *emitFrexpBuiltin(CodeGenFunction &CGF, const CallExpr *E, /// EmitFAbs - Emit a call to @llvm.fabs(). static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) { + CodeGenFunction::IsSpawnedScope SpawnedScp(&CGF); Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType()); + MaybeDetach(&CGF, SpawnedScp); llvm::CallInst *Call = CGF.Builder.CreateCall(F, V); Call->setDoesNotAccessMemory(); return Call; @@ -2154,8 +2198,10 @@ static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty) { } RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { + IsSpawnedScope SpawnedScp(this); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1)); + MaybeDetach(this, SpawnedScp); // The builtin's shift arg may have a different type than the source arg and // result, but the LLVM intrinsic uses the same type for all values. @@ -2425,8 +2471,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fmodl: case Builtin::BI__builtin_fmodf128: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); + IsSpawnedScope SpawnedScp(this); Value *Arg1 = EmitScalarExpr(E->getArg(0)); Value *Arg2 = EmitScalarExpr(E->getArg(1)); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod")); } @@ -2634,6 +2682,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, BuiltinID != Builtin::BI__builtin_va_end); return RValue::get(nullptr); case Builtin::BI__builtin_va_copy: { + IsSpawnedScope SpawnedScp(this); Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer(); Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer(); @@ -2641,6 +2690,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, DstPtr = Builder.CreateBitCast(DstPtr, Type); SrcPtr = Builder.CreateBitCast(SrcPtr, Type); + MaybeDetach(this, SpawnedScp); Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy), {DstPtr, SrcPtr}); return RValue::get(nullptr); } @@ -2649,7 +2699,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_llabs: { // X < 0 ? -X : X // The negation has 'nsw' because abs of INT_MIN is undefined. + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg"); Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType()); Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond"); @@ -2667,7 +2719,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIconj: case Builtin::BIconjf: case Builtin::BIconjl: { + IsSpawnedScope SpawnedScp(this); ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *Real = ComplexVal.first; Value *Imag = ComplexVal.second; Imag = Builder.CreateFNeg(Imag, "neg"); @@ -2718,6 +2772,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_clrsbl: case Builtin::BI__builtin_clrsbll: { // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); @@ -2725,6 +2780,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ResultType = ConvertType(E->getType()); Value *Zero = llvm::Constant::getNullValue(ArgType); + MaybeDetach(this, SpawnedScp); Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg"); Value *Inverse = Builder.CreateNot(ArgValue, "not"); Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue); @@ -2738,6 +2794,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: case Builtin::BI__builtin_ctzll: { + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -2745,6 +2802,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); + MaybeDetach(this, SpawnedScp); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, @@ -2755,6 +2813,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_clz: case Builtin::BI__builtin_clzl: case Builtin::BI__builtin_clzll: { + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero); llvm::Type *ArgType = ArgValue->getType(); @@ -2762,6 +2821,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Type *ResultType = ConvertType(E->getType()); Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef()); + MaybeDetach(this, SpawnedScp); Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, @@ -2772,12 +2832,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_ffsl: case Builtin::BI__builtin_ffsll: { // ffs(x) -> x ? cttz(x) + 1 : 0 + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); + MaybeDetach(this, SpawnedScp); Value *Tmp = Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}), llvm::ConstantInt::get(ArgType, 1)); @@ -2793,12 +2855,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_parityl: case Builtin::BI__builtin_parityll: { // parity(x) -> ctpop(x) & 1 + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); + MaybeDetach(this, SpawnedScp); Value *Tmp = Builder.CreateCall(F, ArgValue); Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1)); if (Result->getType() != ResultType) @@ -2809,12 +2873,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__lzcnt16: case Builtin::BI__lzcnt: case Builtin::BI__lzcnt64: { + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); + MaybeDetach(this, SpawnedScp); Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()}); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, @@ -2827,12 +2893,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_popcount: case Builtin::BI__builtin_popcountl: case Builtin::BI__builtin_popcountll: { + IsSpawnedScope SpawnedScp(this); Value *ArgValue = EmitScalarExpr(E->getArg(0)); llvm::Type *ArgType = ArgValue->getType(); Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); llvm::Type *ResultType = ConvertType(E->getType()); + MaybeDetach(this, SpawnedScp); Value *Result = Builder.CreateCall(F, ArgValue); if (Result->getType() != ResultType) Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true, @@ -3038,6 +3106,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, /*EmittedE=*/nullptr, IsDynamic)); } case Builtin::BI__builtin_prefetch: { + IsSpawnedScope SpawnedScp(this); Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0)); // FIXME: Technically these constants should of type 'int', yes? RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) : @@ -3046,26 +3115,39 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::ConstantInt::get(Int32Ty, 3); Value *Data = llvm::ConstantInt::get(Int32Ty, 1); Function *F = CGM.getIntrinsic(Intrinsic::prefetch, Address->getType()); + MaybeDetach(this, SpawnedScp); Builder.CreateCall(F, {Address, RW, Locality, Data}); return RValue::get(nullptr); } case Builtin::BI__builtin_readcyclecounter: { + IsSpawnedScope SpawnedScp(this); Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateCall(F)); } case Builtin::BI__builtin___clear_cache: { + IsSpawnedScope SpawnedScp(this); Value *Begin = EmitScalarExpr(E->getArg(0)); Value *End = EmitScalarExpr(E->getArg(1)); Function *F = CGM.getIntrinsic(Intrinsic::clear_cache); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateCall(F, {Begin, End})); } - case Builtin::BI__builtin_trap: + case Builtin::BI__builtin_trap: { + IsSpawnedScope SpawnedScp(this); + MaybeDetach(this, SpawnedScp); EmitTrapCall(Intrinsic::trap); return RValue::get(nullptr); - case Builtin::BI__debugbreak: + } + case Builtin::BI__debugbreak: { + IsSpawnedScope SpawnedScp(this); + MaybeDetach(this, SpawnedScp); EmitTrapCall(Intrinsic::debugtrap); return RValue::get(nullptr); + } case Builtin::BI__builtin_unreachable: { + IsSpawnedScope SpawnedScp(this); + MaybeDetach(this, SpawnedScp); EmitUnreachable(E->getExprLoc()); // We do need to preserve an insertion point. @@ -3105,11 +3187,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_islessequal: case Builtin::BI__builtin_islessgreater: case Builtin::BI__builtin_isunordered: { + IsSpawnedScope SpawnedScp(this); // Ordered comparisons: we know the arguments to these are matching scalar // floating point values. CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *LHS = EmitScalarExpr(E->getArg(0)); Value *RHS = EmitScalarExpr(E->getArg(1)); + MaybeDetach(this, SpawnedScp); switch (BuiltinID) { default: llvm_unreachable("Unknown ordered comparison"); @@ -3137,8 +3221,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_isnan: { + IsSpawnedScope SpawnedScp(this); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) return RValue::get(Result); return RValue::get( @@ -3147,6 +3233,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_isinf: { + IsSpawnedScope SpawnedScp(this); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) @@ -3163,6 +3250,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIfinitel: case Builtin::BI__finitel: case Builtin::BI__builtin_isfinite: { + IsSpawnedScope SpawnedScp(this); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); if (Value *Result = tryUseTestFPKind(*this, BuiltinID, V)) @@ -3173,6 +3261,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_isnormal: { + IsSpawnedScope SpawnedScp(this); CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); Value *V = EmitScalarExpr(E->getArg(0)); return RValue::get( @@ -3417,10 +3506,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__builtin_isinf_sign: { + IsSpawnedScope SpawnedScp(this); // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0 CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. Value *Arg = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *AbsArg = EmitFAbs(*this, Arg); Value *IsInf = Builder.CreateFCmpOEQ( AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf"); @@ -3457,8 +3548,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_fpclassify: { CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E); // FIXME: for strictfp/IEEE-754 we need to not trap on SNaN here. + IsSpawnedScope SpawnedScp(this); Value *V = EmitScalarExpr(E->getArg(5)); llvm::Type *Ty = ConvertType(E->getArg(5)->getType()); + MaybeDetach(this, SpawnedScp); // Create Result BasicBlock *Begin = Builder.GetInsertBlock(); @@ -3549,10 +3642,12 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BIbzero: case Builtin::BI__builtin_bzero: { + IsSpawnedScope SpawnedScp(this); Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *SizeVal = EmitScalarExpr(E->getArg(1)); EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); + MaybeDetach(this, SpawnedScp); Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false); return RValue::get(nullptr); } @@ -3560,6 +3655,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_memcpy: case Builtin::BImempcpy: case Builtin::BI__builtin_mempcpy: { + IsSpawnedScope SpawnedScp(this); Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); @@ -3567,6 +3663,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getArg(0)->getExprLoc(), FD, 0); EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); + MaybeDetach(this, SpawnedScp); Builder.CreateMemCpy(Dest, Src, SizeVal, false); if (BuiltinID == Builtin::BImempcpy || BuiltinID == Builtin::BI__builtin_mempcpy) @@ -3594,6 +3691,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, break; case Builtin::BI__builtin___memcpy_chk: { + IsSpawnedScope SpawnedScp(this); // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || @@ -3606,20 +3704,24 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); + MaybeDetach(this, SpawnedScp); Builder.CreateMemCpy(Dest, Src, SizeVal, false); return RValue::get(Dest.getPointer()); } case Builtin::BI__builtin_objc_memmove_collectable: { + IsSpawnedScope SpawnedScp(this); Address DestAddr = EmitPointerWithAlignment(E->getArg(0)); Address SrcAddr = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); + MaybeDetach(this, SpawnedScp); CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this, DestAddr, SrcAddr, SizeVal); return RValue::get(DestAddr.getPointer()); } case Builtin::BI__builtin___memmove_chk: { + IsSpawnedScope SpawnedScp(this); // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || @@ -3632,12 +3734,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); + MaybeDetach(this, SpawnedScp); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(Dest.getPointer()); } case Builtin::BImemmove: case Builtin::BI__builtin_memmove: { + IsSpawnedScope SpawnedScp(this); Address Dest = EmitPointerWithAlignment(E->getArg(0)); Address Src = EmitPointerWithAlignment(E->getArg(1)); Value *SizeVal = EmitScalarExpr(E->getArg(2)); @@ -3645,17 +3749,20 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, E->getArg(0)->getExprLoc(), FD, 0); EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(), E->getArg(1)->getExprLoc(), FD, 1); + MaybeDetach(this, SpawnedScp); Builder.CreateMemMove(Dest, Src, SizeVal, false); return RValue::get(Dest.getPointer()); } case Builtin::BImemset: case Builtin::BI__builtin_memset: { + IsSpawnedScope SpawnedScp(this); Address Dest = EmitPointerWithAlignment(E->getArg(0)); Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = EmitScalarExpr(E->getArg(2)); EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(), E->getArg(0)->getExprLoc(), FD, 0); + MaybeDetach(this, SpawnedScp); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest.getPointer()); } @@ -3671,6 +3778,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin___memset_chk: { + IsSpawnedScope SpawnedScp(this); // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2. Expr::EvalResult SizeResult, DstSizeResult; if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) || @@ -3684,6 +3792,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)), Builder.getInt8Ty()); Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size); + MaybeDetach(this, SpawnedScp); Builder.CreateMemSet(Dest, ByteVal, SizeVal, false); return RValue::get(Dest.getPointer()); } @@ -3806,34 +3915,46 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::ConstantInt::get(Int32Ty, Offset))); } case Builtin::BI__builtin_return_address: { + IsSpawnedScope SpawnedScp(this); Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI_ReturnAddress: { + IsSpawnedScope SpawnedScp(this); Function *F = CGM.getIntrinsic(Intrinsic::returnaddress); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateCall(F, Builder.getInt32(0))); } case Builtin::BI__builtin_frame_address: { + IsSpawnedScope SpawnedScp(this); Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0), getContext().UnsignedIntTy); Function *F = CGM.getIntrinsic(Intrinsic::frameaddress, AllocaInt8PtrTy); + MaybeDetach(this, SpawnedScp); return RValue::get(Builder.CreateCall(F, Depth)); } case Builtin::BI__builtin_extract_return_addr: { + IsSpawnedScope SpawnedScp(this); Value *Address = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *Result = getTargetHooks().decodeReturnAddress(*this, Address); return RValue::get(Result); } case Builtin::BI__builtin_frob_return_addr: { + IsSpawnedScope SpawnedScp(this); Value *Address = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *Result = getTargetHooks().encodeReturnAddress(*this, Address); return RValue::get(Result); } case Builtin::BI__builtin_dwarf_sp_column: { + IsSpawnedScope SpawnedScp(this); llvm::IntegerType *Ty = cast(ConvertType(E->getType())); + MaybeDetach(this, SpawnedScp); int Column = getTargetHooks().getDwarfEHStackPointer(CGM); if (Column == -1) { CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column"); @@ -3842,7 +3963,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(llvm::ConstantInt::get(Ty, Column, true)); } case Builtin::BI__builtin_init_dwarf_reg_size_table: { + IsSpawnedScope SpawnedScp(this); Value *Address = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address)) CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table"); return RValue::get(llvm::UndefValue::get(ConvertType(E->getType()))); @@ -3871,6 +3994,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(nullptr); } case Builtin::BI__builtin_extend_pointer: { + IsSpawnedScope SpawnedScp(this); // Extends a pointer to the size of an _Unwind_Word, which is // uint64_t on all platforms. Generally this gets poked into a // register and eventually used as an address, so if the @@ -3883,6 +4007,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // Cast the pointer to intptr_t. Value *Ptr = EmitScalarExpr(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast"); // If that's 64 bits, we're done. @@ -4080,11 +4205,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__sync_lock_release_4: case Builtin::BI__sync_lock_release_8: case Builtin::BI__sync_lock_release_16: { + IsSpawnedScope SpawnedScp(this); Value *Ptr = CheckAtomicAlignment(*this, E); QualType ElTy = E->getArg(0)->getType()->getPointeeType(); CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8); + MaybeDetach(this, SpawnedScp); llvm::StoreInst *Store = Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr, StoreSize); @@ -4110,6 +4237,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(EmitNontemporalStore(*this, E)); case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: { + IsSpawnedScope SpawnedScp(this); // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since // _Atomic(T) is always properly-aligned. @@ -4127,11 +4255,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName); + SpawnedScp.RestoreOldScope(); return EmitCall(FuncInfo, CGCallee::forDirect(Func), ReturnValueSlot(), Args); } case Builtin::BI__atomic_test_and_set: { + IsSpawnedScope SpawnedScp(this); // Look at the argument type to determine whether this is a volatile // operation. The parameter type is always volatile. QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); @@ -4141,6 +4271,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Value *Ptr = EmitScalarExpr(E->getArg(0)); Value *NewVal = Builder.getInt8(1); Value *Order = EmitScalarExpr(E->getArg(1)); + MaybeDetach(this, SpawnedScp); if (isa(Order)) { int ord = cast(Order)->getZExtValue(); AtomicRMWInst *Result = nullptr; @@ -4215,6 +4346,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } case Builtin::BI__atomic_clear: { + IsSpawnedScope SpawnedScp(this); QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType(); bool Volatile = PtrTy->castAs()->getPointeeType().isVolatileQualified(); @@ -4223,6 +4355,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Ptr = Ptr.withElementType(Int8Ty); Value *NewVal = Builder.getInt8(0); Value *Order = EmitScalarExpr(E->getArg(1)); + MaybeDetach(this, SpawnedScp); if (isa(Order)) { int ord = cast(Order)->getZExtValue(); StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile); @@ -4342,6 +4475,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: case Builtin::BI__builtin_signbitl: { + IsSpawnedScope SpawnedScp(this); + MaybeDetach(this, SpawnedScp); return RValue::get( Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))), ConvertType(E->getType()))); @@ -4394,6 +4529,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_subc: case Builtin::BI__builtin_subcl: case Builtin::BI__builtin_subcll: { + IsSpawnedScope SpawnedScp(this); // We translate all of these builtins from expressions of the form: // int x = ..., y = ..., carryin = ..., carryout, result; @@ -4437,6 +4573,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, IntrinsicId = llvm::Intrinsic::usub_with_overflow; break; } + MaybeDetach(this, SpawnedScp); // Construct our resulting LLVM IR expression. llvm::Value *Carry1; @@ -4454,6 +4591,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_add_overflow: case Builtin::BI__builtin_sub_overflow: case Builtin::BI__builtin_mul_overflow: { + IsSpawnedScope SpawnedScp(this); const clang::Expr *LeftArg = E->getArg(0); const clang::Expr *RightArg = E->getArg(1); const clang::Expr *ResultArg = E->getArg(2); @@ -4513,6 +4651,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *Left = EmitScalarExpr(LeftArg); llvm::Value *Right = EmitScalarExpr(RightArg); Address ResultPtr = EmitPointerWithAlignment(ResultArg); + MaybeDetach(this, SpawnedScp); // Extend each operand to the encompassing type. Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed); @@ -4564,6 +4703,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_smul_overflow: case Builtin::BI__builtin_smull_overflow: case Builtin::BI__builtin_smulll_overflow: { + IsSpawnedScope SpawnedScp(this); // We translate all of these builtins directly to the relevant llvm IR node. @@ -4607,7 +4747,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, IntrinsicId = llvm::Intrinsic::smul_with_overflow; break; } - + MaybeDetach(this, SpawnedScp); llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); @@ -4641,8 +4781,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, // __noop always evaluates to an integer literal zero. return RValue::get(ConstantInt::get(IntTy, 0)); case Builtin::BI__builtin_call_with_static_chain: { + IsSpawnedScope SpawnedScp(this); const CallExpr *Call = cast(E->getArg(0)); const Expr *Chain = E->getArg(1); + SpawnedScp.RestoreOldScope(); return EmitCall(Call->getCallee()->getType(), EmitCallee(Call->getCallee()), Call, ReturnValue, EmitScalarExpr(Chain)); @@ -5239,19 +5381,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, case Builtin::BI__builtin_store_half: case Builtin::BI__builtin_store_halff: { + IsSpawnedScope SpawnedScp(this); Value *Val = EmitScalarExpr(E->getArg(0)); Address Address = EmitPointerWithAlignment(E->getArg(1)); + MaybeDetach(this, SpawnedScp); Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy()); Builder.CreateStore(HalfVal, Address); return RValue::get(nullptr); } case Builtin::BI__builtin_load_half: { + IsSpawnedScope SpawnedScp(this); Address Address = EmitPointerWithAlignment(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *HalfVal = Builder.CreateLoad(Address); return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy())); } case Builtin::BI__builtin_load_halff: { + IsSpawnedScope SpawnedScp(this); Address Address = EmitPointerWithAlignment(E->getArg(0)); + MaybeDetach(this, SpawnedScp); Value *HalfVal = Builder.CreateLoad(Address); return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy())); } @@ -5388,20 +5536,36 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Str.getPointer(), Zeros); return RValue::get(Ptr); } + case Builtin::BI__hyper_lookup: { + llvm::Value *Size = EmitScalarExpr(E->getArg(1)); + Function *F = CGM.getIntrinsic(Intrinsic::hyper_lookup, Size->getType()); + llvm::Value *Ptr = EmitScalarExpr(E->getArg(0)); + llvm::Value *Identity = EmitScalarExpr(E->getArg(2)); + llvm::Value *Reduce = EmitScalarExpr(E->getArg(3)); + return RValue::get(Builder.CreateCall( + F, {Ptr, Size, Builder.CreateBitCast(Identity, VoidPtrTy), + Builder.CreateBitCast(Reduce, VoidPtrTy)})); + } } + IsSpawnedScope SpawnedScp(this); // If this is an alias for a lib function (e.g. __builtin_sin), emit // the call using the normal call path, but using the unmangled // version of the function name. - if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) + if (getContext().BuiltinInfo.isLibFunction(BuiltinID)) { + SpawnedScp.RestoreOldScope(); return emitLibraryCall(*this, FD, E, CGM.getBuiltinLibFunction(FD, BuiltinID)); + } // If this is a predefined lib function (e.g. malloc), emit the call // using exactly the normal call path. - if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) - return emitLibraryCall(*this, FD, E, - cast(EmitScalarExpr(E->getCallee()))); + if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID)) { + llvm::Constant *Callee = + cast(EmitScalarExpr(E->getCallee())); + SpawnedScp.RestoreOldScope(); + return emitLibraryCall(*this, FD, E, Callee); + } // Check that a call to a target specific builtin has the correct target // features. @@ -5481,6 +5645,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Args.push_back(ArgValue); } + MaybeDetach(this, SpawnedScp); Value *V = Builder.CreateCall(F, Args); QualType BuiltinRetType = E->getType(); @@ -5525,6 +5690,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ReturnValue = ReturnValueSlot(DestPtr, false); } + SpawnedScp.RestoreOldScope(); // Now see if we can emit a target-specific builtin. if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { switch (EvalKind) { diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 6b8af9bf18c1ff..5ed5e4b3cbf1e2 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2322,6 +2322,10 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FuncAttrs.addAttribute(llvm::Attribute::NoDuplicate); if (TargetDecl->hasAttr()) FuncAttrs.addAttribute(llvm::Attribute::Convergent); + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute(llvm::Attribute::Stealable); + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute(llvm::Attribute::Injective); if (const FunctionDecl *Fn = dyn_cast(TargetDecl)) { AddAttributesFromFunctionProtoType( @@ -2367,8 +2371,24 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, FuncAttrs.addMemoryAttr(llvm::MemoryEffects::argMemOnly()); FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); } + if (TargetDecl->hasAttr()) { + FuncAttrs.addAttribute(llvm::Attribute::StrandPure); + FuncAttrs.addMemoryAttr(llvm::MemoryEffects::readOnly()); + FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + } + if (TargetDecl->hasAttr()) { + FuncAttrs.addAttribute(llvm::Attribute::ReducerRegister); + } + if (TargetDecl->hasAttr()) { + FuncAttrs.addAttribute(llvm::Attribute::ReducerUnregister); + } + if (TargetDecl->hasAttr()) { + FuncAttrs.addAttribute(llvm::Attribute::HyperView); + } if (TargetDecl->hasAttr()) RetAttrs.addAttribute(llvm::Attribute::NoAlias); + else if (TargetDecl->hasAttr()) + RetAttrs.addAttribute(llvm::Attribute::StrandNoAlias); if (TargetDecl->hasAttr() && !CodeGenOpts.NullPointerIsValid) RetAttrs.addAttribute(llvm::Attribute::NonNull); @@ -4910,6 +4930,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, SourceLocation Loc) { // FIXME: We no longer need the types from CallArgs; lift up and simplify. + IsSpawnedScope SpawnedScp(this); + assert(Callee.isOrdinary() || Callee.isVirtual()); // Handle struct-return functions by passing a pointer to the @@ -5423,6 +5445,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // 3. Perform the actual call. + // If this call is detached, start the detach, if it hasn't yet been started. + if (SpawnedScp.OldScopeIsSpawned()) { + SpawnedScp.RestoreOldScope(); + assert(CurDetachScope && + "A call was spawned, but no detach scope was pushed."); + if (!CurDetachScope->IsDetachStarted()) + CurDetachScope->StartDetach(); + } + // Deactivate any cleanups that we're supposed to do immediately before // the call. if (!CallArgs.getCleanupsToDeactivate().empty()) @@ -5540,6 +5571,12 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, AllocAlignAttrEmitter AllocAlignAttrEmitter(*this, TargetDecl, CallArgs); Attrs = AllocAlignAttrEmitter.TryEmitAsCallSiteAttribute(Attrs); + // If this call might lead to exit() make sure the runtime can + // be shutdown cleanly. + if (CurSyncRegion && !ScopeIsSynced && !InvokeDest && + Attrs.hasFnAttr(llvm::Attribute::NoReturn)) + EmitImplicitSyncCleanup(nullptr); + // Emit the actual call/invoke instruction. llvm::CallBase *CI; if (!InvokeDest) { diff --git a/clang/lib/CodeGen/CGCilk.cpp b/clang/lib/CodeGen/CGCilk.cpp new file mode 100644 index 00000000000000..68e9ecb6cdd9e3 --- /dev/null +++ b/clang/lib/CodeGen/CGCilk.cpp @@ -0,0 +1,866 @@ +//===--- CGCilk.cpp - Emit LLVM Code for Cilk expressions -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This contains code dealing with code generation of Cilk statements and +// expressions. +// +//===----------------------------------------------------------------------===// + +#include "CodeGenFunction.h" +#include "CGCleanup.h" +#include "clang/AST/ExprCilk.h" +#include "clang/AST/StmtCilk.h" + +using namespace clang; +using namespace CodeGen; + +CodeGenFunction::IsSpawnedScope::IsSpawnedScope(CodeGenFunction *CGF) + : CGF(CGF), OldIsSpawned(CGF->IsSpawned), + OldSpawnedCleanup(CGF->SpawnedCleanup) { + CGF->IsSpawned = false; + CGF->SpawnedCleanup = OldIsSpawned; +} + +CodeGenFunction::IsSpawnedScope::~IsSpawnedScope() { + RestoreOldScope(); +} + +bool CodeGenFunction::IsSpawnedScope::OldScopeIsSpawned() const { + return OldIsSpawned; +} + +void CodeGenFunction::IsSpawnedScope::RestoreOldScope() { + CGF->IsSpawned = OldIsSpawned; + CGF->SpawnedCleanup = OldSpawnedCleanup; +} + +void CodeGenFunction::EmitImplicitSyncCleanup(llvm::Instruction *SyncRegion) { + llvm::Instruction *SR = SyncRegion; + // If a sync region wasn't specified with this cleanup initially, try to grab + // the current sync region. + if (!SR && CurSyncRegion && CurSyncRegion->getSyncRegionStart()) + SR = CurSyncRegion->getSyncRegionStart(); + if (!SR) + return; + + llvm::BasicBlock *ContinueBlock = createBasicBlock("sync.continue"); + Builder.CreateSync(ContinueBlock, SR); + EmitBlockAfterUses(ContinueBlock); + if (getLangOpts().Exceptions && !CurFn->doesNotThrow()) + EmitCallOrInvoke(CGM.getIntrinsic(llvm::Intrinsic::sync_unwind), { SR }); +} + +void CodeGenFunction::DetachScope::CreateTaskFrameEHState() { + // Save the old EH state. + OldEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = nullptr; + OldExceptionSlot = CGF.ExceptionSlot; + CGF.ExceptionSlot = nullptr; + OldEHSelectorSlot = CGF.EHSelectorSlot; + CGF.EHSelectorSlot = nullptr; + OldNormalCleanupDest = CGF.NormalCleanupDest; + CGF.NormalCleanupDest = Address::invalid(); +} + +void CodeGenFunction::DetachScope::CreateDetachedEHState() { + // Save the old EH state. + TFEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = nullptr; + TFExceptionSlot = CGF.ExceptionSlot; + CGF.ExceptionSlot = nullptr; + TFEHSelectorSlot = CGF.EHSelectorSlot; + CGF.EHSelectorSlot = nullptr; + TFNormalCleanupDest = CGF.NormalCleanupDest; + CGF.NormalCleanupDest = Address::invalid(); +} + +llvm::BasicBlock *CodeGenFunction::DetachScope::RestoreTaskFrameEHState() { + llvm::BasicBlock *NestedEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = TFEHResumeBlock; + CGF.ExceptionSlot = TFExceptionSlot; + CGF.EHSelectorSlot = TFEHSelectorSlot; + CGF.NormalCleanupDest = TFNormalCleanupDest; + return NestedEHResumeBlock; +} + +llvm::BasicBlock *CodeGenFunction::DetachScope::RestoreParentEHState() { + llvm::BasicBlock *NestedEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = OldEHResumeBlock; + CGF.ExceptionSlot = OldExceptionSlot; + CGF.EHSelectorSlot = OldEHSelectorSlot; + CGF.NormalCleanupDest = OldNormalCleanupDest; + return NestedEHResumeBlock; +} + +void CodeGenFunction::DetachScope::EnsureTaskFrame() { + if (!TaskFrame) { + llvm::Function *TaskFrameCreate = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_create); + TaskFrame = CGF.Builder.CreateCall(TaskFrameCreate); + + // Create a new alloca insertion point within the task frame. + OldAllocaInsertPt = CGF.AllocaInsertPt; + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + CGF.AllocaInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "", + CGF.Builder.GetInsertBlock()); + // SavedDetachedAllocaInsertPt = CGF.AllocaInsertPt; + + CreateTaskFrameEHState(); + + CGF.pushFullExprCleanup( + static_cast(EHCleanup | LifetimeMarker | TaskExit), + TaskFrame); + } +} + +void CodeGenFunction::DetachScope::InitDetachScope() { + // Create the detached and continue blocks. + DetachedBlock = CGF.createBasicBlock("det.achd"); + ContinueBlock = CGF.createBasicBlock("det.cont"); +} + +void CodeGenFunction::DetachScope::PushSpawnedTaskTerminate() { + CGF.pushFullExprCleanupImpl( + // This cleanup should not be a TaskExit, because we've pushed a TaskExit + // cleanup onto EHStack already, corresponding with the taskframe. + static_cast(EHCleanup | LifetimeMarker), + CGF.CurSyncRegion->getSyncRegionStart()); +} + +void CodeGenFunction::DetachScope::StartDetach() { + InitDetachScope(); + + // Set the detached block as the new alloca insertion point. + TFAllocaInsertPt = CGF.AllocaInsertPt; + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + CGF.AllocaInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "", + DetachedBlock); + + if (StmtCleanupsScope) + StmtCleanupsScope->DoDetach(); + else + PushSpawnedTaskTerminate(); + + // Create the detach + Detach = CGF.Builder.CreateDetach(DetachedBlock, ContinueBlock, + CGF.CurSyncRegion->getSyncRegionStart()); + + // Save the old EH state. + CreateDetachedEHState(); + + // Emit the detached block. + CGF.EmitBlock(DetachedBlock); + + // Link this detach block to the task frame, if it exists. + if (TaskFrame) { + llvm::Function *TaskFrameUse = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_use); + CGF.Builder.CreateCall(TaskFrameUse, { TaskFrame }); + } + + // For Cilk, ensure that the detached task is implicitly synced before it + // returns. + CGF.PushSyncRegion()->addImplicitSync(); + + // Initialize lifetime intrinsics for the reference temporary. + if (RefTmp.isValid()) { + switch (RefTmpSD) { + case SD_Automatic: + case SD_FullExpression: + if (auto *Size = CGF.EmitLifetimeStart( + CGF.CGM.getDataLayout().getTypeAllocSize(RefTmp.getElementType()), + RefTmp.getPointer())) { + if (RefTmpSD == SD_Automatic) + CGF.pushCleanupAfterFullExpr(NormalEHLifetimeMarker, + RefTmp, Size); + else + CGF.pushFullExprCleanup(NormalEHLifetimeMarker, + RefTmp, Size); + } + break; + default: + break; + } + } + + DetachStarted = true; +} + +void CodeGenFunction::DetachScope::CleanupDetach() { + if (!DetachStarted || DetachCleanedUp) + return; + + // Pop the sync region for the detached task. + CGF.PopSyncRegion(); + DetachCleanedUp = true; +} + +void CodeGenFunction::DetachScope::EmitTaskEnd() { + if (!CGF.HaveInsertPoint()) + return; + + // The CFG path into the spawned statement should terminate with a `reattach'. + CGF.Builder.CreateReattach(ContinueBlock, + CGF.CurSyncRegion->getSyncRegionStart()); +} + +static void EmitTrivialLandingPad(CodeGenFunction &CGF, + llvm::BasicBlock *TempInvokeDest) { + // Save the current IR generation state. + CGBuilderTy::InsertPoint savedIP = CGF.Builder.saveAndClearIP(); + + // Insert a simple cleanup landingpad at the start of TempInvokeDest. + TempInvokeDest->setName("lpad"); + CGF.EmitBlock(TempInvokeDest); + CGF.Builder.SetInsertPoint(&TempInvokeDest->front()); + + llvm::LandingPadInst *LPadInst = + CGF.Builder.CreateLandingPad(llvm::StructType::get(CGF.Int8PtrTy, + CGF.Int32Ty), 0); + + llvm::Value *LPadExn = CGF.Builder.CreateExtractValue(LPadInst, 0); + CGF.Builder.CreateStore(LPadExn, CGF.getExceptionSlot()); + llvm::Value *LPadSel = CGF.Builder.CreateExtractValue(LPadInst, 1); + CGF.Builder.CreateStore(LPadSel, CGF.getEHSelectorSlot()); + + LPadInst->setCleanup(true); + + // Restore the old IR generation state. + CGF.Builder.restoreIP(savedIP); +} + +void CodeGenFunction::DetachScope::FinishDetach() { + if (!DetachStarted) + return; + + CleanupDetach(); + // Pop the detached_rethrow. + CGF.PopCleanupBlock(); + + EmitTaskEnd(); + + // Restore the alloca insertion point to taskframe_create. + { + llvm::Instruction *Ptr = CGF.AllocaInsertPt; + CGF.AllocaInsertPt = TFAllocaInsertPt; + SavedDetachedAllocaInsertPt = nullptr; + Ptr->eraseFromParent(); + } + + // Restore the task frame's EH state. + llvm::BasicBlock *TaskResumeBlock = RestoreTaskFrameEHState(); + assert(!TaskResumeBlock && "Emission of task produced a resume block"); + + llvm::BasicBlock *InvokeDest = nullptr; + if (TempInvokeDest) { + InvokeDest = CGF.getInvokeDest(); + if (InvokeDest) + TempInvokeDest->replaceAllUsesWith(InvokeDest); + else { + InvokeDest = TempInvokeDest; + EmitTrivialLandingPad(CGF, TempInvokeDest); + TempInvokeDest = nullptr; + } + } + + // Emit the continue block. + CGF.EmitBlock(ContinueBlock); + + // If the detached-rethrow handler is used, add an unwind destination to the + // detach. + if (InvokeDest) { + CGBuilderTy::InsertPoint SavedIP = CGF.Builder.saveIP(); + CGF.Builder.SetInsertPoint(Detach); + // Create the new detach instruction. + llvm::DetachInst *NewDetach = CGF.Builder.CreateDetach( + Detach->getDetached(), Detach->getContinue(), InvokeDest, + Detach->getSyncRegion()); + // Remove the old detach. + Detach->eraseFromParent(); + Detach = NewDetach; + CGF.Builder.restoreIP(SavedIP); + } + + // Pop the taskframe. + CGF.PopCleanupBlock(); + + // Restore the alloca insertion point. + { + llvm::Instruction *Ptr = CGF.AllocaInsertPt; + CGF.AllocaInsertPt = OldAllocaInsertPt; + TFAllocaInsertPt = nullptr; + Ptr->eraseFromParent(); + } + + // Restore the original EH state. + llvm::BasicBlock *NestedEHResumeBlock = RestoreParentEHState(); + + if (TempInvokeDest) { + if (llvm::BasicBlock *InvokeDest = CGF.getInvokeDest()) { + TempInvokeDest->replaceAllUsesWith(InvokeDest); + } else + EmitTrivialLandingPad(CGF, TempInvokeDest); + } + + // If invocations in the parallel task led to the creation of EHResumeBlock, + // we need to create for outside the task. In particular, the new + // EHResumeBlock must use an ExceptionSlot and EHSelectorSlot allocated + // outside of the task. + if (NestedEHResumeBlock) { + if (!NestedEHResumeBlock->use_empty()) { + // Translate the nested EHResumeBlock into an appropriate EHResumeBlock in + // the outer scope. + NestedEHResumeBlock->replaceAllUsesWith( + CGF.getEHResumeBlock( + isa(NestedEHResumeBlock->getTerminator()))); + } + delete NestedEHResumeBlock; + } +} + +Address CodeGenFunction::DetachScope::CreateDetachedMemTemp( + QualType Ty, StorageDuration SD, const Twine &Name) { + // There shouldn't be multiple reference temporaries needed. + assert(!RefTmp.isValid() && + "Already created a reference temporary in this detach scope."); + + // Create the reference temporary + RefTmp = CGF.CreateMemTemp(Ty, Name); + RefTmpSD = SD; + + return RefTmp; +} + +CodeGenFunction::TaskFrameScope::TaskFrameScope(CodeGenFunction &CGF) + : CGF(CGF) { + if (LangOptions::Cilk_none == CGF.getLangOpts().getCilk()) + return; + if (!CGF.CurSyncRegion) + return; + + llvm::Function *TaskFrameCreate = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_create); + TaskFrame = CGF.Builder.CreateCall(TaskFrameCreate); + + // Create a new alloca insertion point within the task frame. + OldAllocaInsertPt = CGF.AllocaInsertPt; + llvm::Value *Undef = llvm::UndefValue::get(CGF.Int32Ty); + CGF.AllocaInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "", + CGF.Builder.GetInsertBlock()); + + // Save the old EH state. + OldEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = nullptr; + OldExceptionSlot = CGF.ExceptionSlot; + CGF.ExceptionSlot = nullptr; + OldEHSelectorSlot = CGF.EHSelectorSlot; + CGF.EHSelectorSlot = nullptr; + OldNormalCleanupDest = CGF.NormalCleanupDest; + CGF.NormalCleanupDest = Address::invalid(); + + CGF.pushFullExprCleanup( + static_cast(NormalAndEHCleanup | LifetimeMarker | TaskExit), + this); +} + +CodeGenFunction::TaskFrameScope::~TaskFrameScope() { + if (LangOptions::Cilk_none == CGF.getLangOpts().getCilk()) + return; + if (!CGF.CurSyncRegion) + return; + + // Pop the taskframe. + CGF.PopCleanupBlock(); + + // Restore the alloca insertion point. + { + llvm::Instruction *Ptr = CGF.AllocaInsertPt; + CGF.AllocaInsertPt = OldAllocaInsertPt; + Ptr->eraseFromParent(); + } + + // Restore the original EH state. + llvm::BasicBlock *NestedEHResumeBlock = CGF.EHResumeBlock; + CGF.EHResumeBlock = OldEHResumeBlock; + CGF.ExceptionSlot = OldExceptionSlot; + CGF.EHSelectorSlot = OldEHSelectorSlot; + CGF.NormalCleanupDest = OldNormalCleanupDest; + + if (TempInvokeDest) { + if (llvm::BasicBlock *InvokeDest = CGF.getInvokeDest()) { + TempInvokeDest->replaceAllUsesWith(InvokeDest); + } else + EmitTrivialLandingPad(CGF, TempInvokeDest); + + if (TempInvokeDest->use_empty()) + delete TempInvokeDest; + } + + // If invocations in the parallel task led to the creation of EHResumeBlock, + // we need to create for outside the task. In particular, the new + // EHResumeBlock must use an ExceptionSlot and EHSelectorSlot allocated + // outside of the task. + if (NestedEHResumeBlock) { + if (!NestedEHResumeBlock->use_empty()) { + // Translate the nested EHResumeBlock into an appropriate EHResumeBlock in + // the outer scope. + NestedEHResumeBlock->replaceAllUsesWith( + CGF.getEHResumeBlock( + isa(NestedEHResumeBlock->getTerminator()))); + } + delete NestedEHResumeBlock; + } +} + +llvm::Instruction *CodeGenFunction::EmitSyncRegionStart() { + // Start the sync region. To ensure the syncregion.start call dominates all + // uses of the generated token, we insert this call at the alloca insertion + // point. + auto NL = ApplyDebugLocation::CreateArtificial(*this); + llvm::Instruction *SRStart = llvm::CallInst::Create( + CGM.getIntrinsic(llvm::Intrinsic::syncregion_start), + "syncreg", AllocaInsertPt); + SRStart->setDebugLoc(Builder.getCurrentDebugLocation()); + return SRStart; +} + +/// EmitCilkSyncStmt - Emit a _Cilk_sync node. +void CodeGenFunction::EmitCilkSyncStmt(const CilkSyncStmt &S) { + // Check if we are generating unreachable code. + if (!HaveInsertPoint()) + // We don't need to generate actual code. + return; + + llvm::BasicBlock *ContinueBlock = createBasicBlock("sync.continue"); + + // Generate a stoppoint if we are emitting debug info. + EmitStopPoint(&S); + + EnsureSyncRegion(); + + llvm::Instruction *SRStart = CurSyncRegion->getSyncRegionStart(); + + Builder.CreateSync(ContinueBlock, SRStart); + EmitBlock(ContinueBlock); + if (getLangOpts().Exceptions && !CurFn->doesNotThrow()) + EmitCallOrInvoke(CGM.getIntrinsic(llvm::Intrinsic::sync_unwind), + { SRStart }); +} + +void CodeGenFunction::EmitCilkScopeStmt(const CilkScopeStmt &S) { + LexicalScope CilkScope(*this, S.getSourceRange()); + + // If this _Cilk_scope is outermost in the function, emit + // tapir_runtime_{start,end} intrinsics around the scope. + bool ThisScopeIsOutermost = false; + if (!WithinCilkScope) { + WithinCilkScope = true; + ThisScopeIsOutermost = true; + } + + { + // Add a taskframe around this scope in case there are other spawns outside + // of this scope, which would need to be synced separately. + TaskFrameScope TFScope(*this); + if (ThisScopeIsOutermost && !CurSyncRegion) { + llvm::Instruction *TapirRTStart = Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::tapir_runtime_start)); + // Mark the end of the _Cilk_scope with tapir_runtime_end. + EHStack.pushCleanup(NormalAndEHCleanup, + TapirRTStart); + } + // Create a nested synced scope. + SyncedScopeRAII SyncedScp(*this); + PushSyncRegion()->addImplicitSync(); + bool BodyIsCompoundStmt = isa(S.getBody()); + if (BodyIsCompoundStmt) + ScopeIsSynced = true; + + // Emit the spawned statement. + EmitStmt(S.getBody()); + + PopSyncRegion(); + } + + // If this _Cilk_scope is outermost in the function, mark that CodeGen is no + // longer emitting within a _Cilk_scope. + if (ThisScopeIsOutermost) + WithinCilkScope = false; +} + +static const Stmt *IgnoreImplicitAndCleanups(const Stmt *S) { + const Stmt *Current = S; + if (auto *E = dyn_cast_or_null(S)) + Current = E->IgnoreImplicit(); + const Stmt *Lasts = nullptr; + while (Current != Lasts) { + Lasts = Current; + if (const auto *EWC = dyn_cast(Current)) + Current = EWC->getSubExpr()->IgnoreImplicit(); + } + return Current; +} + +void CodeGenFunction::EmitCilkSpawnStmt(const CilkSpawnStmt &S) { + // Handle spawning of calls in a special manner, to evaluate + // arguments before spawn. + if (isa(IgnoreImplicitAndCleanups(S.getSpawnedStmt()))) { + // Set up to perform a detach. + assert(!IsSpawned && + "_Cilk_spawn statement found in spawning environment."); + IsSpawned = true; + PushDetachScope(); + + // Emit the call. + EmitStmt(S.getSpawnedStmt()); + + // Finish the detach. + if (IsSpawned) { + if (!CurDetachScope->IsDetachStarted()) + FailedSpawnWarning(S.getBeginLoc()); + IsSpawned = false; + PopDetachScope(); + } + + return; + } + + // Otherwise, we assume that the programmer dealt with races correctly. + + // Set up to perform a detach. + PushDetachScope(); + CurDetachScope->StartDetach(); + + SyncedScopeRAII SyncedScp(*this); + if (isa(S.getSpawnedStmt())) + ScopeIsSynced = true; + + // Emit the spawned statement. + EmitStmt(S.getSpawnedStmt()); + + // Finish the detach. + PopDetachScope(); +} + +LValue CodeGenFunction::EmitCilkSpawnExprLValue(const CilkSpawnExpr *E) { + assert(isa(IgnoreImplicitAndCleanups(E->getSpawnedExpr())) && + "SpawnExprLValue does not spawn a call."); + assert(!IsSpawned && + "_Cilk_spawn statement found in spawning environment."); + IsSpawned = true; + PushDetachScope(); + + LValue LV = EmitLValue(E->getSpawnedExpr()); + + // Finish the detach. + if (IsSpawned) { + if (!CurDetachScope->IsDetachStarted()) + FailedSpawnWarning(E->getExprLoc()); + IsSpawned = false; + PopDetachScope(); + } + return LV; +} + +void CodeGenFunction::EmitCilkForStmt(const CilkForStmt &S, + ArrayRef ForAttrs) { + JumpDest LoopExit = getJumpDestInCurrentScope("pfor.end"); + + PushSyncRegion(); + llvm::Instruction *SyncRegion = EmitSyncRegionStart(); + CurSyncRegion->setSyncRegionStart(SyncRegion); + + llvm::BasicBlock *TempInvokeDest = createBasicBlock("temp.invoke.dest"); + + LexicalScope ForScope(*this, S.getSourceRange()); + + // Evaluate the first part before the loop. + if (S.getInit()) + EmitStmt(S.getInit()); + + llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); + // If there are any cleanups between here and the loop-exit scope, + // create a block to stage a loop exit along. + if (ForScope.requiresCleanups()) + ExitBlock = createBasicBlock("pfor.initcond.cleanup"); + + if (S.getLimitStmt()) { + EmitStmt(S.getLimitStmt()); + + // As long as the condition is true, iterate the loop. + llvm::BasicBlock *PForPH = createBasicBlock("pfor.ph"); + + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getInitCond()); + Builder.CreateCondBr( + BoolCondVal, PForPH, ExitBlock, + createProfileWeightsForLoop(S.getInitCond(), + getProfileCount(S.getBody()))); + + if (ExitBlock != LoopExit.getBlock()) { + EmitBlock(ExitBlock); + EmitBranchThroughCleanup(LoopExit); + } + + EmitBlock(PForPH); + } + if (S.getBeginStmt()) + EmitStmt(S.getBeginStmt()); + if (S.getEndStmt()) + EmitStmt(S.getEndStmt()); + + assert(S.getCond() && "_Cilk_for loop has no condition"); + + // Start the loop with a block that tests the condition. If there's an + // increment, the continue scope will be overwritten later. + JumpDest Continue = getJumpDestInCurrentScope("pfor.cond"); + llvm::BasicBlock *CondBlock = Continue.getBlock(); + EmitBlock(CondBlock); + + LoopStack.setSpawnStrategy(LoopAttributes::DAC); + const SourceRange &R = S.getSourceRange(); + LoopStack.push(CondBlock, CGM.getContext(), CGM.getCodeGenOpts(), ForAttrs, + SourceLocToDebugLoc(R.getBegin()), + SourceLocToDebugLoc(R.getEnd())); + + const Expr *Inc = S.getInc(); + assert(Inc && "_Cilk_for loop has no increment"); + Continue = getJumpDestInCurrentScope("pfor.inc"); + + // Ensure that the _Cilk_for loop iterations are synced on exit from the loop. + EHStack.pushCleanup(NormalCleanup, SyncRegion); + + // Create a cleanup scope for the condition variable cleanups. + LexicalScope ConditionScope(*this, S.getSourceRange()); + + // Variables to store the old alloca insert point. + llvm::AssertingVH OldAllocaInsertPt; + // Variables to store the old EH state. + llvm::BasicBlock *OldEHResumeBlock; + llvm::Value *OldExceptionSlot; + llvm::AllocaInst *OldEHSelectorSlot; + Address OldNormalCleanupDest = Address::invalid(); + + const DeclStmt *LoopVar = S.getLoopVarStmt(); + const VarDecl *LoopVarDecl = + LoopVar ? cast(LoopVar->getSingleDecl()) : nullptr; + RValue LoopVarInitRV; + llvm::BasicBlock *DetachBlock; + llvm::BasicBlock *ForBodyEntry; + llvm::BasicBlock *ForBody; + llvm::DetachInst *Detach; + { + // FIXME: Figure out if there is a way to support condition variables in + // Cilk. + // + // // If the for statement has a condition scope, emit the local variable + // // declaration. + // if (S.getConditionVariable()) { + // EmitAutoVarDecl(*S.getConditionVariable()); + // } + + // If there are any cleanups between here and the loop-exit scope, + // create a block to stage a loop exit along. + if (ForScope.requiresCleanups()) + ExitBlock = createBasicBlock("pfor.cond.cleanup"); + + // As long as the condition is true, iterate the loop. + DetachBlock = createBasicBlock("pfor.detach"); + // Emit extra entry block for detached body, to ensure that this detached + // entry block has just one predecessor. + ForBodyEntry = createBasicBlock("pfor.body.entry"); + ForBody = createBasicBlock("pfor.body"); + + EmitBranch(DetachBlock); + + EmitBlockAfterUses(DetachBlock); + + // Get the value of the loop variable initialization before we emit the + // detach. + if (LoopVar) + LoopVarInitRV = EmitAnyExprToTemp(LoopVarDecl->getInit()); + + Detach = Builder.CreateDetach(ForBodyEntry, Continue.getBlock(), + SyncRegion); + // Save the old alloca insert point. + OldAllocaInsertPt = AllocaInsertPt; + // Save the old EH state. + OldEHResumeBlock = EHResumeBlock; + OldExceptionSlot = ExceptionSlot; + OldEHSelectorSlot = EHSelectorSlot; + OldNormalCleanupDest = NormalCleanupDest; + + // Create a new alloca insert point. + llvm::Value *Undef = llvm::UndefValue::get(Int32Ty); + AllocaInsertPt = new llvm::BitCastInst(Undef, Int32Ty, "", ForBodyEntry); + + // Push a cleanup to make sure any exceptional exit from the loop is + // terminated by a detached.rethrow. + EHStack.pushCleanup( + static_cast(EHCleanup | LifetimeMarker | TaskExit), + SyncRegion, TempInvokeDest); + + // Set up nested EH state. + EHResumeBlock = nullptr; + ExceptionSlot = nullptr; + EHSelectorSlot = nullptr; + NormalCleanupDest = Address::invalid(); + + EmitBlock(ForBodyEntry); + } + + RunCleanupsScope DetachCleanupsScope(*this); + + // Set up a nested sync region for the loop body, and ensure it has an + // implicit sync. + PushSyncRegion()->addImplicitSync(); + + // Store the blocks to use for break and continue. + JumpDest Preattach = getJumpDestInCurrentScope("pfor.preattach"); + BreakContinueStack.push_back(BreakContinue(Preattach, Preattach)); + + // Inside the detached block, create the loop variable, setting its value to + // the saved initialization value. + if (LoopVar) { + AutoVarEmission LVEmission = EmitAutoVarAlloca(*LoopVarDecl); + QualType type = LoopVarDecl->getType(); + Address Loc = LVEmission.getObjectAddress(*this); + LValue LV = MakeAddrLValue(Loc, type); + LV.setNonGC(true); + EmitStoreThroughLValue(LoopVarInitRV, LV, true); + EmitAutoVarCleanups(LVEmission); + } + + Builder.CreateBr(ForBody); + + EmitBlock(ForBody); + + incrementProfileCounter(&S); + + { + // Create a separate cleanup scope for the body, in case it is not + // a compound statement. + RunCleanupsScope BodyScope(*this); + + SyncedScopeRAII SyncedScp(*this); + if (isa(S.getBody())) + ScopeIsSynced = true; + EmitStmt(S.getBody()); + + if (HaveInsertPoint()) + Builder.CreateBr(Preattach.getBlock()); + } + + // Finish detached body and emit the reattach. + { + EmitBlock(Preattach.getBlock()); + // The design of the exception-handling mechanism means we need to cleanup + // the scope before popping the sync region. + DetachCleanupsScope.ForceCleanup(); + PopSyncRegion(); + // Pop the detached.rethrow cleanup. + PopCleanupBlock(); + Builder.CreateReattach(Continue.getBlock(), SyncRegion); + } + + // Restore CGF state after detached region. + llvm::BasicBlock *NestedEHResumeBlock; + { + // Restore the alloca insertion point. + llvm::Instruction *Ptr = AllocaInsertPt; + AllocaInsertPt = OldAllocaInsertPt; + Ptr->eraseFromParent(); + + // Restore the EH state. + NestedEHResumeBlock = EHResumeBlock; + EHResumeBlock = OldEHResumeBlock; + ExceptionSlot = OldExceptionSlot; + EHSelectorSlot = OldEHSelectorSlot; + NormalCleanupDest = OldNormalCleanupDest; + } + + // An invocation of the detached.rethrow intrinsic marks the end of an + // exceptional return from the parallel-loop body. That invoke needs a valid + // landinpad as its unwind destination. We create that unwind destination + // here. + llvm::BasicBlock *InvokeDest = nullptr; + if (!TempInvokeDest->use_empty()) { + InvokeDest = getInvokeDest(); + if (InvokeDest) + TempInvokeDest->replaceAllUsesWith(InvokeDest); + else { + InvokeDest = TempInvokeDest; + EmitTrivialLandingPad(*this, TempInvokeDest); + } + } + + // If invocations in the parallel task led to the creation of EHResumeBlock, + // we need to create for outside the task. In particular, the new + // EHResumeBlock must use an ExceptionSlot and EHSelectorSlot allocated + // outside of the task. + if (NestedEHResumeBlock) { + if (!NestedEHResumeBlock->use_empty()) { + // Translate the nested EHResumeBlock into an appropriate EHResumeBlock in + // the outer scope. + NestedEHResumeBlock->replaceAllUsesWith( + getEHResumeBlock( + isa(NestedEHResumeBlock->getTerminator()))); + } + delete NestedEHResumeBlock; + } + + // Emit the increment next. + EmitBlockAfterUses(Continue.getBlock()); + EmitStmt(Inc); + + { + // If the detached-rethrow handler is used, add an unwind destination to the + // detach. + if (InvokeDest) { + CGBuilderTy::InsertPoint SavedIP = Builder.saveIP(); + Builder.SetInsertPoint(Detach); + // Create the new detach instruction. + llvm::DetachInst *NewDetach = Builder.CreateDetach( + ForBodyEntry, Continue.getBlock(), InvokeDest, + SyncRegion); + // Remove the old detach. + Detach->eraseFromParent(); + Detach = NewDetach; + Builder.restoreIP(SavedIP); + } + } + + BreakContinueStack.pop_back(); + + ConditionScope.ForceCleanup(); + + EmitStopPoint(&S); + + // C99 6.8.5p2/p4: The first substatement is executed if the expression + // compares unequal to 0. The condition must be a scalar type. + llvm::Value *BoolCondVal = EvaluateExprAsBool(S.getCond()); + Builder.CreateCondBr( + BoolCondVal, CondBlock, ExitBlock, + createProfileWeightsForLoop(S.getCond(), getProfileCount(S.getBody()))); + + if (ExitBlock != LoopExit.getBlock()) { + EmitBlock(ExitBlock); + EmitBranchThroughCleanup(LoopExit); + } + + ForScope.ForceCleanup(); + + LoopStack.pop(); + // Emit the fall-through block. + EmitBlock(LoopExit.getBlock(), true); + PopSyncRegion(); + + if (TempInvokeDest->use_empty()) + delete TempInvokeDest; +} diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp index 6ef7d12372d098..21fb33c90e37a3 100644 --- a/clang/lib/CodeGen/CGClass.cpp +++ b/clang/lib/CodeGen/CGClass.cpp @@ -2106,6 +2106,7 @@ void CodeGenFunction::EmitCXXAggrConstructorCall(const CXXConstructorDecl *ctor, void CodeGenFunction::destroyCXXObject(CodeGenFunction &CGF, Address addr, QualType type) { + type = type.stripHyperobject(); const RecordType *rtype = type->castAs(); const CXXRecordDecl *record = cast(rtype->getDecl()); const CXXDestructorDecl *dtor = record->getDestructor(); diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 0bbab283603d98..ee30853a35d1a9 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -205,6 +205,8 @@ void *EHScopeStack::pushCleanup(CleanupKind Kind, size_t Size) { InnermostEHScope = stable_begin(); if (IsLifetimeMarker) Scope->setLifetimeMarker(); + if (Kind & TaskExit) + Scope->setTaskExit(); // With Windows -EHa, Invoke llvm.seh.scope.begin() for EHCleanup if (CGF->getLangOpts().EHAsynch && IsEHCleanup && !IsLifetimeMarker && @@ -431,9 +433,10 @@ void CodeGenFunction::ResolveBranchFixups(llvm::BasicBlock *Block) { /// Pops cleanup blocks until the given savepoint is reached. void CodeGenFunction::PopCleanupBlocks( EHScopeStack::stable_iterator Old, - std::initializer_list ValuesToReload) { + std::initializer_list ValuesToReload, bool AfterSync) { assert(Old.isValid()); + bool EmitSync = AfterSync; bool HadBranches = false; while (EHStack.stable_begin() != Old) { EHCleanupScope &Scope = cast(*EHStack.begin()); @@ -445,7 +448,8 @@ void CodeGenFunction::PopCleanupBlocks( bool FallThroughIsBranchThrough = Old.strictlyEncloses(Scope.getEnclosingNormalCleanup()); - PopCleanupBlock(FallThroughIsBranchThrough); + PopCleanupBlock(FallThroughIsBranchThrough, EmitSync); + EmitSync = false; } // If we didn't have any branches, the insertion point before cleanups must @@ -487,8 +491,8 @@ void CodeGenFunction::PopCleanupBlocks( /// cleanups from the given savepoint in the lifetime-extended cleanups stack. void CodeGenFunction::PopCleanupBlocks( EHScopeStack::stable_iterator Old, size_t OldLifetimeExtendedSize, - std::initializer_list ValuesToReload) { - PopCleanupBlocks(Old, ValuesToReload); + std::initializer_list ValuesToReload, bool AfterSync) { + PopCleanupBlocks(Old, ValuesToReload, AfterSync); // Move our deferred cleanups onto the EH stack. for (size_t I = OldLifetimeExtendedSize, @@ -656,7 +660,8 @@ static void destroyOptimisticNormalEntry(CodeGenFunction &CGF, /// Pops a cleanup block. If the block includes a normal cleanup, the /// current insertion point is threaded through the cleanup, as are /// any branch fixups on the cleanup. -void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { +void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough, + bool AfterSync) { assert(!EHStack.empty() && "cleanup stack is empty!"); assert(isa(*EHStack.begin()) && "top not a cleanup!"); EHCleanupScope &Scope = cast(*EHStack.begin()); @@ -802,6 +807,11 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { EmitSehTryScopeEnd(); } + if (AfterSync) { + EmitImplicitSyncCleanup(); + return PopCleanupBlock(FallthroughIsBranchThrough, false); + } + destroyOptimisticNormalEntry(*this, Scope); EHStack.popCleanup(); @@ -934,6 +944,8 @@ void CodeGenFunction::PopCleanupBlock(bool FallthroughIsBranchThrough) { } // IV. Pop the cleanup and emit it. + if (AfterSync) + EmitImplicitSyncCleanup(); EHStack.popCleanup(); assert(EHStack.hasNormalCleanups() == HasEnclosingCleanups); @@ -1085,13 +1097,17 @@ bool CodeGenFunction::isObviouslyBranchWithoutCleanups(JumpDest Dest) const { /// be known, in which case this will require a fixup. /// /// As a side-effect, this method clears the insertion point. -void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) { +void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest, bool AfterSync) { assert(Dest.getScopeDepth().encloses(EHStack.stable_begin()) && "stale jump destination"); if (!HaveInsertPoint()) return; + // If needed, insert an implicit _Cilk_sync before the cleanups. + if (AfterSync) + EmitImplicitSyncCleanup(); + // Create the branch. llvm::BranchInst *BI = Builder.CreateBr(Dest.getBlock()); diff --git a/clang/lib/CodeGen/CGCleanup.h b/clang/lib/CodeGen/CGCleanup.h index 079a3e25d6dc78..40890ad806422e 100644 --- a/clang/lib/CodeGen/CGCleanup.h +++ b/clang/lib/CodeGen/CGCleanup.h @@ -81,6 +81,9 @@ class EHScope { /// Whether the EH cleanup should test the activation flag. unsigned TestFlagInEHCleanup : 1; + /// Whether this cleanup marks the exit of a task. + unsigned IsTaskExit : 1; + /// The amount of extra storage needed by the Cleanup. /// Always a multiple of the scope-stack alignment. unsigned CleanupSize : 12; @@ -297,6 +300,7 @@ class alignas(8) EHCleanupScope : public EHScope { CleanupBits.IsLifetimeMarker = false; CleanupBits.TestFlagInNormalCleanup = false; CleanupBits.TestFlagInEHCleanup = false; + CleanupBits.IsTaskExit = false; CleanupBits.CleanupSize = cleanupSize; assert(CleanupBits.CleanupSize == cleanupSize && "cleanup size overflow"); @@ -343,6 +347,9 @@ class alignas(8) EHCleanupScope : public EHScope { return CleanupBits.TestFlagInEHCleanup; } + bool isTaskExit() const { return CleanupBits.IsTaskExit; } + void setTaskExit() { CleanupBits.IsTaskExit = true; } + unsigned getFixupDepth() const { return FixupDepth; } EHScopeStack::stable_iterator getEnclosingNormalCleanup() const { return EnclosingNormal; diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index d8eb2aecb87ad3..0e9fd7838cd526 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -948,6 +948,10 @@ llvm::DIType *CGDebugInfo::CreateType(const BitIntType *Ty) { Encoding); } +llvm::DIType *CGDebugInfo::CreateType(const HyperobjectType *Ty) { + llvm_unreachable("handled in CreateTypeNode"); +} + llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) { // Bit size and offset of the type. llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float; @@ -3592,6 +3596,9 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { return CreateType(cast(Ty)); case Type::Complex: return CreateType(cast(Ty)); + case Type::Hyperobject: + return CreateTypeNode + (cast(Ty)->getElementType().getCanonicalType(), Unit); case Type::Pointer: return CreateType(cast(Ty), Unit); case Type::BlockPointer: diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h index 58ee6dd64c4fc3..5d3d8bf4967662 100644 --- a/clang/lib/CodeGen/CGDebugInfo.h +++ b/clang/lib/CodeGen/CGDebugInfo.h @@ -176,6 +176,7 @@ class CGDebugInfo { /// ivars and property accessors. llvm::DIType *CreateType(const BuiltinType *Ty); llvm::DIType *CreateType(const ComplexType *Ty); + llvm::DIType *CreateType(const HyperobjectType *Ty); llvm::DIType *CreateType(const BitIntType *Ty); llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg); llvm::DIType *CreateQualifiedType(const FunctionProtoType *Ty, diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index d99dcdba8e431b..0e086d1866c149 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -795,6 +795,10 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, LValue lvalue, bool capturedByInit) { Qualifiers::ObjCLifetime lifetime = lvalue.getObjCLifetime(); if (!lifetime) { + if (isa(init)) { + EmitScalarExprIntoLValue(init, lvalue, /*isInit*/ true); + return; + } llvm::Value *value = EmitScalarExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); @@ -1858,6 +1862,58 @@ void CodeGenFunction::emitZeroOrPatternForAutoVarInit(QualType type, } } +bool CodeGenFunction::getReducer(const VarDecl *D, ReducerCallbacks &CB) { + if (const HyperobjectType *H = D->getType()->getAs()) { + if (H->hasCallbacks()) { + CB.Identity = H->getIdentity(); + CB.Reduce = H->getReduce(); + return true; + } + } + return false; +} + +void CodeGenFunction::destroyHyperobject(CodeGenFunction &CGF, Address Addr, + QualType Type) { + llvm::Function *F = CGF.CGM.getIntrinsic(llvm::Intrinsic::reducer_unregister); + llvm::Value *Arg = + CGF.Builder.CreateBitCast(Addr.getPointer(), CGF.CGM.VoidPtrTy); + CGF.Builder.CreateCall(F, {Arg}); + QualType Inner = Type.stripHyperobject(); + if (const RecordType *rtype = Inner->getAs()) { + if (const CXXRecordDecl *record = dyn_cast(rtype->getDecl())) + if (record->hasNonTrivialDestructor()) + destroyCXXObject(CGF, Addr, Inner); + } +} + +void CodeGenFunction::EmitReducerInit(const VarDecl *D, + const ReducerCallbacks &C, + llvm::Value *Addr) { + RValue Identity = EmitAnyExpr(C.Identity); + RValue Reduce = EmitAnyExpr(C.Reduce); + + llvm::Type *SizeType = ConvertType(getContext().getSizeType()); + llvm::Value *Size = nullptr; + QualType Type = D->getType(); + if (const VariableArrayType *VLA = + getContext().getAsVariableArrayType(Type)) { + auto V = getVLASize(VLA); + llvm::Value *Size1 = CGM.getSize(getContext().getTypeSizeInChars(V.Type)); + Size = Builder.CreateNUWMul(V.NumElts, Size1); + } else { + Size = CGM.getSize(getContext().getTypeSizeInChars(Type)); + } + // TODO: mark this call as registering a local + // TODO: add better handling of attribute arguments that evaluate to null + SmallVector Types = {SizeType}; + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::reducer_register, Types); + llvm::Value *IdentityV = Identity.getScalarVal(); + llvm::Value *ReduceV = Reduce.getScalarVal(); + Builder.CreateCall(F, {Addr, Size, IdentityV, ReduceV}); +} + void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { assert(emission.Variable && "emission was not valid!"); @@ -1868,6 +1924,14 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { auto DL = ApplyDebugLocation::CreateDefaultArtificial(*this, D.getLocation()); QualType type = D.getType(); + ReducerCallbacks RCB = {0, 0}; + bool Reducer = false; + if (const HyperobjectType *H = type->getAs()) { + type = H->getElementType(); + assert(!emission.IsEscapingByRef); // block reducers not supported + Reducer = getReducer(&D, RCB); + } + // If this local has an initializer, emit it now. const Expr *Init = D.getInit(); @@ -1925,8 +1989,14 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { return emitZeroOrPatternForAutoVarInit(type, D, Loc); }; - if (isTrivialInitializer(Init)) - return initializeWhatIsTechnicallyUninitialized(Loc); + if (isTrivialInitializer(Init)) { + initializeWhatIsTechnicallyUninitialized(Loc); + if (Reducer) + EmitReducerInit(&D, RCB, + Builder.CreateBitCast(emission.Addr.getPointer(), + CGM.VoidPtrTy)); + return; + } llvm::Constant *constant = nullptr; if (emission.IsConstantAggregate || @@ -1955,19 +2025,34 @@ void CodeGenFunction::EmitAutoVarInit(const AutoVarEmission &emission) { initializeWhatIsTechnicallyUninitialized(Loc); LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); - return EmitExprAsInit(Init, &D, lv, capturedByInit); + EmitExprAsInit(Init, &D, lv, capturedByInit); + if (Reducer) + EmitReducerInit(&D, RCB, + Builder.CreateBitCast(emission.Addr.getPointer(), + CGM.VoidPtrTy)); + return; } if (!emission.IsConstantAggregate) { // For simple scalar/complex initialization, store the value directly. LValue lv = MakeAddrLValue(Loc, type); lv.setNonGC(true); - return EmitStoreThroughLValue(RValue::get(constant), lv, true); + EmitStoreThroughLValue(RValue::get(constant), lv, true); + if (Reducer) + EmitReducerInit(&D, RCB, + Builder.CreateBitCast(emission.Addr.getPointer(), + CGM.VoidPtrTy)); + return; } emitStoresForConstant(CGM, D, Loc.withElementType(CGM.Int8Ty), type.isVolatileQualified(), Builder, constant, /*IsAutoInit=*/false); + + if (Reducer) + EmitReducerInit(&D, RCB, + Builder.CreateBitCast(emission.Addr.getPointer(), + CGM.VoidPtrTy)); } /// Emit an expression as an initializer for an object (variable, field, etc.) @@ -1991,11 +2076,16 @@ void CodeGenFunction::EmitExprAsInit(const Expr *init, const ValueDecl *D, EmitStoreThroughLValue(rvalue, lvalue, true); return; } + switch (getEvaluationKind(type)) { case TEK_Scalar: EmitScalarInit(init, D, lvalue, capturedByInit); return; case TEK_Complex: { + if (isa(init)) { + EmitComplexExprIntoLValue(init, lvalue, /*init*/ true); + return; + } ComplexPairTy complex = EmitComplexExpr(init); if (capturedByInit) drillIntoBlockVariable(*this, lvalue, cast(D)); @@ -2038,15 +2128,23 @@ void CodeGenFunction::emitAutoVarTypeCleanup( CleanupKind cleanupKind = NormalAndEHCleanup; CodeGenFunction::Destroyer *destroyer = nullptr; + bool IsReducer = false; + switch (dtorKind) { case QualType::DK_none: llvm_unreachable("no cleanup for trivially-destructible variable"); + case QualType::DK_hyperobject: + IsReducer = true; + break; + case QualType::DK_cxx_destructor: + if (const HyperobjectType *H = type->getAs()) + IsReducer = H->hasCallbacks(); // If there's an NRVO flag on the emission, we need a different // cleanup. if (emission.NRVOFlag) { - assert(!type->isArrayType()); + assert(!type->isArrayType() && !IsReducer); CXXDestructorDecl *dtor = type->getAsCXXRecordDecl()->getDestructor(); EHStack.pushCleanup(cleanupKind, addr, type, dtor, emission.NRVOFlag); @@ -2140,7 +2238,10 @@ void CodeGenFunction::EmitAutoVarCleanups(const AutoVarEmission &emission) { CodeGenFunction::Destroyer * CodeGenFunction::getDestroyer(QualType::DestructionKind kind) { switch (kind) { - case QualType::DK_none: llvm_unreachable("no destroyer for trivial dtor"); + case QualType::DK_none: + return nullptr; + case QualType::DK_hyperobject: + return destroyHyperobject; case QualType::DK_cxx_destructor: return destroyCXXObject; case QualType::DK_objc_strong_lifetime: @@ -2177,6 +2278,9 @@ void CodeGenFunction::pushDestroy(QualType::DestructionKind dtorKind, void CodeGenFunction::pushDestroy(CleanupKind cleanupKind, Address addr, QualType type, Destroyer *destroyer, bool useEHCleanupForArray) { + if (SpawnedCleanup) + return pushLifetimeExtendedDestroy(cleanupKind, addr, type, destroyer, + useEHCleanupForArray); pushFullExprCleanup(cleanupKind, addr, type, destroyer, useEHCleanupForArray); } diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index a9c88110d6f0b2..07c3e7519f2845 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -35,10 +35,13 @@ static void EmitDeclInit(CodeGenFunction &CGF, const VarDecl &D, assert(!D.getType()->isReferenceType() && "Should not call EmitDeclInit on a reference!"); + const Expr *Init = D.getInit(); + if (!Init) // XXX OpenCilk + return; + QualType type = D.getType(); LValue lv = CGF.MakeAddrLValue(DeclPtr, type); - const Expr *Init = D.getInit(); switch (CGF.getEvaluationKind(type)) { case TEK_Scalar: { CodeGenModule &CGM = CGF.CGM; @@ -87,6 +90,11 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, case QualType::DK_cxx_destructor: break; + case QualType::DK_hyperobject: + if (const HyperobjectType *H = D.getType()->getAs()) + DtorKind = H->getElementType().isDestructedType(); + break; + case QualType::DK_objc_strong_lifetime: case QualType::DK_objc_weak_lifetime: case QualType::DK_nontrivial_c_struct: @@ -105,6 +113,7 @@ static void EmitDeclDestroy(CodeGenFunction &CGF, const VarDecl &D, // Under some ABIs, destructors return this instead of void, and cannot be // passed directly to __cxa_atexit if the target does not allow this // mismatch. + // Note that Record is null for a hyperobject. const CXXRecordDecl *Record = Type->getAsCXXRecordDecl(); bool CanRegisterDestructor = Record && (!CGM.getCXXABI().HasThisReturn( @@ -468,6 +477,10 @@ llvm::Function *CodeGenModule::CreateGlobalInitOrCleanUpFunction( !isInNoSanitizeList(SanitizerKind::Thread, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (getLangOpts().Sanitize.has(SanitizerKind::Cilk) && + !isInNoSanitizeList(SanitizerKind::Cilk, Fn, Loc)) + Fn->addFnAttr(llvm::Attribute::SanitizeCilk); + if (getLangOpts().Sanitize.has(SanitizerKind::Memory) && !isInNoSanitizeList(SanitizerKind::Memory, Fn, Loc)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); @@ -1013,6 +1026,13 @@ void CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn, if (getLangOpts().HLSL) CGM.getHLSLRuntime().annotateHLSLResource(D, Addr); + ReducerCallbacks RCB = {0, 0}; + if (getReducer(D, RCB)) { + llvm::Value *Addr = + Builder.CreateBitCast(CGM.GetAddrOfGlobalVar(D, nullptr), CGM.VoidPtrTy); + EmitReducerInit(D, RCB, Addr); + } + FinishFunction(); } @@ -1123,20 +1143,43 @@ llvm::Function *CodeGenFunction::generateDestroyHelper( ImplicitParamDecl::Other); args.push_back(&Dst); + DynamicInitKind Kind; + bool IsReducer; + const char *Name; + if (const HyperobjectType *H = type->getAs()) { + IsReducer = VD->isReducer(); + Kind = DynamicInitKind::AtExit; + type = H->getElementType(); + Name = "__cxx_global_hyperobject_dtor"; + } else { + assert(destroyer && "neither reducer nor destructor"); + IsReducer = false; + Kind = DynamicInitKind::GlobalArrayDestructor; + Name = "__cxx_global_array_dtor"; + } + const CGFunctionInfo &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(getContext().VoidTy, args); llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI); llvm::Function *fn = CGM.CreateGlobalInitOrCleanUpFunction( - FTy, "__cxx_global_array_dtor", FI, VD->getLocation()); + FTy, Name, FI, VD->getLocation()); CurEHLocation = VD->getBeginLoc(); - StartFunction(GlobalDecl(VD, DynamicInitKind::GlobalArrayDestructor), - getContext().VoidTy, fn, FI, args); + StartFunction(GlobalDecl(VD, Kind), getContext().VoidTy, fn, FI, args); // Emit an artificial location for this function. auto AL = ApplyDebugLocation::CreateArtificial(*this); - emitDestroy(addr, type, destroyer, useEHCleanupForArray); + if (IsReducer) { + llvm::Function *Unregister = + CGM.getIntrinsic(llvm::Intrinsic::reducer_unregister); + llvm::Value *AddrVoid = + Builder.CreateBitCast(addr.getPointer(), CGM.VoidPtrTy); + Builder.CreateCall(Unregister, {AddrVoid}); + } + + if (destroyer) + emitDestroy(addr, type, destroyer, useEHCleanupForArray); FinishFunction(); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 9cb7d4c7731deb..fc5cc156971e05 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -609,8 +609,24 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { } void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) { + TaskFrameScope TFScope(*this); EnterCXXTryStmt(S); - EmitStmt(S.getTryBlock()); + { + // If compiling Cilk code, create a nested sync region, with an implicit + // sync, for the try-catch. + bool CompilingCilk = (getLangOpts().getCilk() != LangOptions::Cilk_none); + SyncedScopeRAII SyncedScp(*this); + if (CompilingCilk) { + PushSyncRegion(); + if (isa(S.getTryBlock())) + ScopeIsSynced = true; + } + EmitStmt(S.getTryBlock()); + + // Pop the nested sync region after the try block. + if (CompilingCilk) + PopSyncRegion(); + } ExitCXXTryStmt(S); } @@ -855,6 +871,8 @@ llvm::BasicBlock *CodeGenFunction::EmitLandingPad() { case EHScope::Cleanup: // If we have a cleanup, remember that. hasCleanup = (hasCleanup || cast(*I).isEHCleanup()); + if (cast(*I).isTaskExit()) + goto done; continue; case EHScope::Filter: { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index fc16b3133f73b4..a6dac6a664e330 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -411,6 +411,11 @@ static Address createReferenceTemporary(CodeGenFunction &CGF, // FIXME: Should we put the new global into a COMDAT? return Address(C, GV->getValueType(), alignment); } + if (CGF.IsSpawned) { + CGF.PushDetachScope(); + return CGF.CurDetachScope->CreateDetachedMemTemp( + Ty, M->getStorageDuration(), "det.ref.tmp"); + } return CGF.CreateMemTemp(Ty, "ref.tmp", Alloca); } case SD_Thread: @@ -515,6 +520,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } } else { + if (!IsSpawned) { switch (M->getStorageDuration()) { case SD_Automatic: if (auto *Size = EmitLifetimeStart( @@ -569,6 +575,7 @@ EmitMaterializeTemporaryExpr(const MaterializeTemporaryExpr *M) { default: break; } + } EmitAnyExprToMem(E, Object, Qualifiers(), /*IsInit*/true); } pushTemporaryCleanup(*this, M, E, Object); @@ -1357,11 +1364,18 @@ LValue CodeGenFunction::EmitLValueHelper(const Expr *E, return EmitCXXUuidofLValue(cast(E)); case Expr::LambdaExprClass: return EmitAggExprToLValue(E); + case Expr::CilkSpawnExprClass: + return EmitCilkSpawnExprLValue(cast(E)); case Expr::ExprWithCleanupsClass: { const auto *cleanups = cast(E); RunCleanupsScope Scope(*this); + bool CleanupsSaved = false; + if (IsSpawned) + CleanupsSaved = CurDetachScope->MaybeSaveCleanupsScope(&Scope); LValue LV = EmitLValue(cleanups->getSubExpr(), IsKnownNonNull); + if (CleanupsSaved) + CurDetachScope->CleanupDetach(); if (LV.isSimple()) { // Defend against branches out of gnu statement expressions surrounded by // cleanups. @@ -5163,6 +5177,28 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } + if (isa(E->getRHS()->IgnoreImplicit())) { + // Emit the LHS before the RHS. + LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + + // Set up to perform a detach. + assert(!IsSpawned && + "_Cilk_spawn statement found in spawning environment."); + IsSpawned = true; + + // Emit the expression. + RValue RV = EmitAnyExpr(E->getRHS()); + EmitStoreThroughLValue(RV, LV); + + // Finish the detach. + if (!(CurDetachScope && CurDetachScope->IsDetachStarted())) + FailedSpawnWarning(E->getRHS()->getExprLoc()); + PopDetachScope(); + IsSpawned = false; + + return LV; + } + RValue RV = EmitAnyExpr(E->getRHS()); LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); if (RV.isScalar()) @@ -5315,6 +5351,16 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee assert(CalleeType->isFunctionPointerType() && "Call must have function pointer type!"); + if (IsSpawned) { + PushDetachScope(); + CurDetachScope->EnsureTaskFrame(); + } + + IsSpawnedScope SpawnedScp(this); + // RAII to finish detach scope after processing CallExpr E, if E uses a + // spawned value. + DetachScopeRAII DetScope(*this); + const Decl *TargetDecl = OrigCallee.getAbstractInfo().getCalleeDecl().getDecl(); @@ -5512,6 +5558,8 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee Address(Cast, Handle->getType(), CGM.getPointerAlign())); Callee.setFunctionPointer(Stub); } + + SpawnedScp.RestoreOldScope(); llvm::CallBase *CallOrInvoke = nullptr; RValue Call = EmitCall(FnInfo, Callee, ReturnValue, Args, &CallOrInvoke, E == MustTailCall, E->getExprLoc()); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 810b28f25fa18b..dd5cbd22a59e8b 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -165,6 +165,7 @@ class AggExprEmitter : public StmtVisitor { void VisitBinAssign(const BinaryOperator *E); void VisitBinComma(const BinaryOperator *E); void VisitBinCmp(const BinaryOperator *E); + void VisitCilkSpawnExpr(CilkSpawnExpr *E); void VisitCXXRewrittenBinaryOperator(CXXRewrittenBinaryOperator *E) { Visit(E->getSemanticForm()); } @@ -942,6 +943,19 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { } } +void AggExprEmitter::VisitCilkSpawnExpr(CilkSpawnExpr *E) { + CGF.IsSpawned = true; + CGF.PushDetachScope(); + + Visit(E->getSpawnedExpr()); + + // Pop the detach scope + if (!(CGF.IsSpawned && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(E->getExprLoc()); + CGF.IsSpawned = false; + CGF.PopDetachScope(); +} + void AggExprEmitter::VisitCallExpr(const CallExpr *E) { if (E->getCallReturnType(CGF.getContext())->isReferenceType()) { EmitAggLoadOfLValue(E); @@ -1229,6 +1243,12 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { EnsureDest(E->getRHS()->getType()); Visit(E->getRHS()); CGF.EmitAtomicStore(Dest.asRValue(), LHS, /*isInit*/ false); + if (CGF.IsSpawned) { + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(E->getRHS()->getExprLoc()); + CGF.IsSpawned = false; + CGF.PopDetachScope(); + } return; } @@ -1250,6 +1270,13 @@ void AggExprEmitter::VisitBinAssign(const BinaryOperator *E) { E->getType().isDestructedType() == QualType::DK_nontrivial_c_struct) CGF.pushDestroy(QualType::DK_nontrivial_c_struct, Dest.getAddress(), E->getType()); + + if (CGF.IsSpawned) { + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(E->getRHS()->getExprLoc()); + CGF.IsSpawned = false; + CGF.PopDetachScope(); + } } void AggExprEmitter:: @@ -1400,7 +1427,16 @@ AggExprEmitter::VisitLambdaExpr(LambdaExpr *E) { void AggExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { CodeGenFunction::RunCleanupsScope cleanups(CGF); + // If this expression is spawned, associate these cleanups with the detach + // scope. + bool CleanupsSaved = false; + if (CGF.IsSpawned) + CleanupsSaved = CGF.CurDetachScope->MaybeSaveCleanupsScope(&cleanups); Visit(E->getSubExpr()); + // If this expression was spawned, then we must clean up the detach before + // forcing the scope's cleanup. + if (CleanupsSaved) + CGF.CurDetachScope->CleanupDetach(); } void AggExprEmitter::VisitCXXScalarValueInitExpr(CXXScalarValueInitExpr *E) { diff --git a/clang/lib/CodeGen/CGExprCXX.cpp b/clang/lib/CodeGen/CGExprCXX.cpp index 4d3f3e9603d942..b93e799a6468e6 100644 --- a/clang/lib/CodeGen/CGExprCXX.cpp +++ b/clang/lib/CodeGen/CGExprCXX.cpp @@ -82,12 +82,19 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorCall( ReturnValueSlot ReturnValue, llvm::Value *This, llvm::Value *ImplicitParam, QualType ImplicitParamTy, const CallExpr *CE, CallArgList *RtlArgs) { + IsSpawnedScope SpawnedScp(this); const FunctionProtoType *FPT = MD->getType()->castAs(); CallArgList Args; + if (auto *OCE = dyn_cast_or_null(CE)) + if (OCE->isAssignmentOp()) + // Restore the original spawned scope when handling an assignment + // operator, so that the RHS of the assignment is detached. + SpawnedScp.RestoreOldScope(); MemberCallInfo CallInfo = commonEmitCXXMemberOrOperatorCall( *this, MD, This, ImplicitParam, ImplicitParamTy, CE, Args, RtlArgs); auto &FnInfo = CGM.getTypes().arrangeCXXMethodCall( Args, FPT, CallInfo.ReqArgs, CallInfo.PrefixSize); + SpawnedScp.RestoreOldScope(); return EmitCall(FnInfo, Callee, ReturnValue, Args, nullptr, CE && CE == MustTailCall, CE ? CE->getExprLoc() : SourceLocation()); @@ -99,6 +106,7 @@ RValue CodeGenFunction::EmitCXXDestructorCall( const CXXMethodDecl *DtorDecl = cast(Dtor.getDecl()); assert(!ThisTy.isNull()); + ThisTy = ThisTy.stripHyperobject(); assert(ThisTy->getAsCXXRecordDecl() == DtorDecl->getParent() && "Pointer/Object mixup"); @@ -174,7 +182,7 @@ static CXXRecordDecl *getCXXRecord(const Expr *E) { QualType T = E->getType(); if (const PointerType *PTy = T->getAs()) T = PTy->getPointeeType(); - const RecordType *Ty = T->castAs(); + const RecordType *Ty = T.stripHyperobject()->castAs(); return cast(Ty->getDecl()); } @@ -213,6 +221,8 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( const Expr *Base) { assert(isa(CE) || isa(CE)); + IsSpawnedScope SpawnedScp(this); + // Compute the object pointer. bool CanUseVirtualCall = MD->isVirtual() && !HasQualifier; @@ -258,7 +268,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( CallArgList *RtlArgs = nullptr; LValue TrivialAssignmentRHS; if (auto *OCE = dyn_cast(CE)) { - if (OCE->isAssignmentOp()) { + if (OCE->isAssignmentOp() && !SpawnedScp.OldScopeIsSpawned()) { if (TrivialAssignment) { TrivialAssignmentRHS = EmitLValue(CE->getArg(1)); } else { @@ -285,6 +295,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( // constructing a new complete object of type Ctor. assert(!RtlArgs); assert(ReturnValue.isNull() && "Constructor shouldn't have return value"); + SpawnedScp.RestoreOldScope(); CallArgList Args; commonEmitCXXMemberOrOperatorCall( *this, {Ctor, Ctor_Complete}, This.getPointer(*this), @@ -305,12 +316,19 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( if (TrivialAssignment) { // We don't like to generate the trivial copy/move assignment operator // when it isn't necessary; just produce the proper effect here. + if (isa(CE) && SpawnedScp.OldScopeIsSpawned()) { + // Restore the original spawned scope so that the RHS of the assignment + // is detached. + SpawnedScp.RestoreOldScope(); + TrivialAssignmentRHS = EmitLValue(CE->getArg(1)); + } // It's important that we use the result of EmitLValue here rather than // emitting call arguments, in order to preserve TBAA information from // the RHS. LValue RHS = isa(CE) ? TrivialAssignmentRHS : EmitLValue(*CE->arg_begin()); + SpawnedScp.RestoreOldScope(); EmitAggregateAssign(This, RHS, CE->getType()); return RValue::get(This.getPointer(*this)); } @@ -366,6 +384,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( "Destructor shouldn't have explicit parameters"); assert(ReturnValue.isNull() && "Destructor shouldn't have return value"); if (UseVirtualCall) { + SpawnedScp.RestoreOldScope(); CGM.getCXXABI().EmitVirtualDestructorCall(*this, Dtor, Dtor_Complete, This.getAddress(*this), cast(CE)); @@ -383,6 +402,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( QualType ThisTy = IsArrow ? Base->getType()->getPointeeType() : Base->getType(); + SpawnedScp.RestoreOldScope(); EmitCXXDestructorCall(GD, Callee, This.getPointer(*this), ThisTy, /*ImplicitParam=*/nullptr, /*ImplicitParamTy=*/QualType(), CE); @@ -425,6 +445,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( This.setAddress(NewThisAddr); } + SpawnedScp.RestoreOldScope(); return EmitCXXMemberOrOperatorCall( CalleeDecl, Callee, ReturnValue, This.getPointer(*this), /*ImplicitParam=*/nullptr, QualType(), CE, RtlArgs); @@ -433,6 +454,7 @@ RValue CodeGenFunction::EmitCXXMemberOrOperatorMemberCallExpr( RValue CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, ReturnValueSlot ReturnValue) { + IsSpawnedScope SpawnedScp(this); const BinaryOperator *BO = cast(E->getCallee()->IgnoreParens()); const Expr *BaseExpr = BO->getLHS(); @@ -474,6 +496,7 @@ CodeGenFunction::EmitCXXMemberPointerCallExpr(const CXXMemberCallExpr *E, // And the rest of the call args EmitCallArgs(Args, FPT, E->arguments()); + SpawnedScp.RestoreOldScope(); return EmitCall(CGM.getTypes().arrangeCXXMethodCall(Args, FPT, required, /*PrefixSize=*/0), Callee, ReturnValue, Args, nullptr, E == MustTailCall, @@ -486,9 +509,10 @@ CodeGenFunction::EmitCXXOperatorMemberCallExpr(const CXXOperatorCallExpr *E, ReturnValueSlot ReturnValue) { assert(MD->isInstance() && "Trying to emit a member call expr on a static method!"); - return EmitCXXMemberOrOperatorMemberCallExpr( + RValue Result = EmitCXXMemberOrOperatorMemberCallExpr( E, MD, ReturnValue, /*HasQualifier=*/false, /*Qualifier=*/nullptr, /*IsArrow=*/false, E->getArg(0)); + return Result; } RValue CodeGenFunction::EmitCUDAKernelCallExpr(const CUDAKernelCallExpr *E, @@ -2075,7 +2099,7 @@ void CodeGenFunction::EmitCXXDeleteExpr(const CXXDeleteExpr *E) { EmitBlock(DeleteNotNull); Ptr.setKnownNonNull(); - QualType DeleteTy = E->getDestroyedType(); + QualType DeleteTy = E->getDestroyedType().stripHyperobject(); // A destroying operator delete overrides the entire operation of the // delete expression. diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 2dd1a991ec9719..d5e86bdac90edf 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -47,10 +47,17 @@ class ComplexExprEmitter CGBuilderTy &Builder; bool IgnoreReal; bool IgnoreImag; + bool DoSpawnedInit = false; + LValue LValueToSpawnInit; public: ComplexExprEmitter(CodeGenFunction &cgf, bool ir=false, bool ii=false) : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii) { } + ComplexExprEmitter(CodeGenFunction &cgf, LValue LValueToSpawnInit, + bool ir=false, bool ii=false) + : CGF(cgf), Builder(CGF.Builder), IgnoreReal(ir), IgnoreImag(ii), + DoSpawnedInit(true), LValueToSpawnInit(LValueToSpawnInit) { + } //===--------------------------------------------------------------------===// @@ -126,6 +133,22 @@ class ComplexExprEmitter ComplexPairTy VisitUnaryCoawait(const UnaryOperator *E) { return Visit(E->getSubExpr()); } + ComplexPairTy VisitCilkSpawnExpr(CilkSpawnExpr *CSE) { + CGF.IsSpawned = true; + CGF.PushDetachScope(); + ComplexPairTy C = Visit(CSE->getSpawnedExpr()); + if (DoSpawnedInit) { + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(CSE->getExprLoc()); + LValue LV = LValueToSpawnInit; + EmitStoreOfComplex(C, LV, /*init*/ true); + + // Pop the detach scope + CGF.IsSpawned = false; + CGF.PopDetachScope(); + } + return C; + } ComplexPairTy emitConstant(const CodeGenFunction::ConstantEmission &Constant, Expr *E) { @@ -228,7 +251,16 @@ class ComplexExprEmitter } ComplexPairTy VisitExprWithCleanups(ExprWithCleanups *E) { CodeGenFunction::RunCleanupsScope Scope(CGF); + // If this expression is spawned, associate these cleanups with the detach + // scope. + bool CleanupsSaved = false; + if (CGF.IsSpawned) + CleanupsSaved = CGF.CurDetachScope->MaybeSaveCleanupsScope(&Scope); ComplexPairTy Vals = Visit(E->getSubExpr()); + // If this expression was spawned, then we must clean up the detach before + // forcing the scope's cleanup. + if (CleanupsSaved) + CGF.CurDetachScope->CleanupDetach(); // Defend against dominance problems caused by jumps out of expression // evaluation through the shared cleanup block. Scope.ForceCleanup({&Vals.first, &Vals.second}); @@ -1163,6 +1195,31 @@ LValue ComplexExprEmitter::EmitBinAssignLValue(const BinaryOperator *E, TestAndClearIgnoreReal(); TestAndClearIgnoreImag(); + if (isa(E->getRHS()->IgnoreImplicit())) { + assert(!CGF.IsSpawned && + "_Cilk_spawn statement found in spawning environment."); + + // Compute the address to store into. + LValue LHS = CGF.EmitLValue(E->getLHS()); + + // Prepare to detach. + CGF.IsSpawned = true; + + // Emit the spawned RHS. + Val = Visit(E->getRHS()); + + // Store the result value into the LHS lvalue. + EmitStoreOfComplex(Val, LHS, /*isInit*/ false); + + // Finish the detach. + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(E->getRHS()->getExprLoc()); + CGF.IsSpawned = false; + CGF.PopDetachScope(); + + return LHS; + } + // Emit the RHS. __block variables need the RHS evaluated first. Val = Visit(E->getRHS()); @@ -1302,6 +1359,10 @@ void CodeGenFunction::EmitComplexExprIntoLValue(const Expr *E, LValue dest, bool isInit) { assert(E && getComplexType(E->getType()) && "Invalid complex expression to emit"); + if (isa(E) && isInit) { + ComplexExprEmitter(*this, dest).Visit(const_cast(E)); + return; + } ComplexExprEmitter Emitter(*this); ComplexPairTy Val = Emitter.Visit(const_cast(E)); Emitter.EmitStoreOfComplex(Val, dest, isInit); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 91369b7d8804da..325c4500e95264 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1653,7 +1653,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { } InConstantContext = D.hasConstantInitialization(); - QualType destType = D.getType(); + QualType destType = D.getType().stripHyperobject(); const Expr *E = D.getInit(); assert(E && "No initializer to emit"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index fe1a59b21f3801..b5dc2eb5eafe71 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -217,6 +217,8 @@ class ScalarExprEmitter CodeGenFunction &CGF; CGBuilderTy &Builder; bool IgnoreResultAssign; + bool DoSpawnedInit = false; + LValue LValueToSpawnInit; llvm::LLVMContext &VMContext; public: @@ -225,6 +227,13 @@ class ScalarExprEmitter VMContext(cgf.getLLVMContext()) { } + ScalarExprEmitter(CodeGenFunction &cgf, LValue LValueToSpawnInit, + bool ira=false) + : CGF(cgf), Builder(CGF.Builder), IgnoreResultAssign(ira), + DoSpawnedInit(true), LValueToSpawnInit(LValueToSpawnInit), + VMContext(cgf.getLLVMContext()) { + } + //===--------------------------------------------------------------------===// // Utilities //===--------------------------------------------------------------------===// @@ -446,6 +455,23 @@ class ScalarExprEmitter Value *VisitUnaryCoawait(const UnaryOperator *E) { return Visit(E->getSubExpr()); } + Value *VisitCilkSpawnExpr(CilkSpawnExpr *CSE) { + CGF.IsSpawned = true; + CGF.PushDetachScope(); + Value *V = Visit(CSE->getSpawnedExpr()); + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(CSE->getExprLoc()); + if (DoSpawnedInit) { + LValue LV = LValueToSpawnInit; + CGF.EmitNullabilityCheck(LV, V, CSE->getExprLoc()); + CGF.EmitStoreThroughLValue(RValue::get(V), LV, true); + + // Pop the detach scope + CGF.IsSpawned = false; + CGF.PopDetachScope(); + } + return V; + } // Leaves. Value *VisitIntegerLiteral(const IntegerLiteral *E) { @@ -2492,7 +2518,16 @@ Value *ScalarExprEmitter::VisitStmtExpr(const StmtExpr *E) { Value *ScalarExprEmitter::VisitExprWithCleanups(ExprWithCleanups *E) { CodeGenFunction::RunCleanupsScope Scope(CGF); + // If this expression is spawned, associate these cleanups with the detach + // scope. + bool CleanupsSaved = false; + if (CGF.IsSpawned) + CleanupsSaved = CGF.CurDetachScope->MaybeSaveCleanupsScope(&Scope); Value *V = Visit(E->getSubExpr()); + // If this expression was spawned, then we must clean up the detach before + // forcing the scope's cleanup. + if (CleanupsSaved) + CGF.CurDetachScope->CleanupDetach(); // Defend against dominance problems caused by jumps out of expression // evaluation through the shared cleanup block. Scope.ForceCleanup({&V}); @@ -4492,6 +4527,35 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { break; case Qualifiers::OCL_None: + if (isa(E->getRHS()->IgnoreImplicit())) { + assert(!CGF.IsSpawned && + "_Cilk_spawn statement found in spawning environment."); + + // Compute the address to store into. + LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); + + // Prepare to detach. + CGF.IsSpawned = true; + + // Emit the spawned RHS. + RHS = Visit(E->getRHS()); + + // Store the value into the LHS. Bit-fields are handled specially because + // the result is altered by the store, i.e., [C99 6.5.16p1] 'An assignment + // expression has the value of the left operand after the assignment...'. + if (LHS.isBitField()) + CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); + else + CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); + + // Finish the detach. + if (!(CGF.CurDetachScope && CGF.CurDetachScope->IsDetachStarted())) + CGF.FailedSpawnWarning(E->getRHS()->getExprLoc()); + CGF.IsSpawned = false; + CGF.PopDetachScope(); + + break; + } // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. RHS = Visit(E->getRHS()); @@ -5086,6 +5150,20 @@ Value *CodeGenFunction::EmitScalarExpr(const Expr *E, bool IgnoreResultAssign) { .Visit(const_cast(E)); } +void CodeGenFunction::EmitScalarExprIntoLValue(const Expr *E, LValue dest, + bool isInit) { + assert(E && hasScalarEvaluationKind(E->getType()) && + "Invalid scalar expression to emit"); + + if (isa(E) && isInit) { + ScalarExprEmitter(*this, dest).Visit(const_cast(E)); + return; + } + Value *V = ScalarExprEmitter(*this).Visit(const_cast(E)); + EmitNullabilityCheck(dest, V, E->getExprLoc()); + EmitStoreThroughLValue(RValue::get(V), dest, isInit); +} + /// Emit a conversion from the specified type to the specified destination type, /// both of which are LLVM scalar types. Value *CodeGenFunction::EmitScalarConversion(Value *Src, QualType SrcTy, diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index e5d9db273c2d33..805de5d9f4ba8e 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -452,8 +452,10 @@ LoopAttributes::LoopAttributes(bool IsParallel) VectorizePredicateEnable(LoopAttributes::Unspecified), VectorizeWidth(0), VectorizeScalable(LoopAttributes::Unspecified), InterleaveCount(0), UnrollCount(0), UnrollAndJamCount(0), + TapirGrainsize(0), DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), - PipelineInitiationInterval(0), MustProgress(false) {} + PipelineInitiationInterval(0), MustProgress(false), + SpawnStrategy(LoopAttributes::Sequential) {} void LoopAttributes::clear() { IsParallel = false; @@ -462,6 +464,7 @@ void LoopAttributes::clear() { InterleaveCount = 0; UnrollCount = 0; UnrollAndJamCount = 0; + TapirGrainsize = 0; VectorizeEnable = LoopAttributes::Unspecified; UnrollEnable = LoopAttributes::Unspecified; UnrollAndJamEnable = LoopAttributes::Unspecified; @@ -470,6 +473,7 @@ void LoopAttributes::clear() { PipelineDisabled = false; PipelineInitiationInterval = 0; MustProgress = false; + SpawnStrategy = LoopAttributes::Sequential; } LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, @@ -487,6 +491,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, if (!Attrs.IsParallel && Attrs.VectorizeWidth == 0 && Attrs.VectorizeScalable == LoopAttributes::Unspecified && Attrs.InterleaveCount == 0 && Attrs.UnrollCount == 0 && + Attrs.TapirGrainsize == 0 && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && @@ -494,12 +499,39 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, Attrs.UnrollEnable == LoopAttributes::Unspecified && Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified && Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc && - !EndLoc && !Attrs.MustProgress) + !EndLoc && !Attrs.MustProgress && + Attrs.SpawnStrategy == LoopAttributes::Sequential) return; TempLoopID = MDNode::getTemporary(Header->getContext(), std::nullopt); } +void LoopInfo::getTapirLoopProperties( + const LoopAttributes &Attrs, SmallVectorImpl &LoopProperties) { + LLVMContext &Ctx = Header->getContext(); + + if (Attrs.SpawnStrategy == LoopAttributes::Sequential) + return; + + // Setting tapir.loop.spawn.strategy + if (Attrs.SpawnStrategy != LoopAttributes::Sequential) { + Metadata *Vals[] = { + MDString::get(Ctx, "tapir.loop.spawn.strategy"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.SpawnStrategy))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } + + // Setting tapir.loop.grainsize + if (Attrs.TapirGrainsize > 0) { + Metadata *Vals[] = { + MDString::get(Ctx, "tapir.loop.grainsize"), + ConstantAsMetadata::get(ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + Attrs.TapirGrainsize))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } +} + void LoopInfo::finish() { // We did not annotate the loop body instructions because there are no // attributes for this loop. @@ -579,8 +611,11 @@ void LoopInfo::finish() { CurLoopAttr = BeforeJam; } + SmallVector TapirLoopProperties; + getTapirLoopProperties(CurLoopAttr, TapirLoopProperties); + bool HasUserTransforms = false; - LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms); + LoopID = createMetadata(CurLoopAttr, TapirLoopProperties, HasUserTransforms); TempLoopID->replaceAllUsesWith(LoopID); } @@ -668,6 +703,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::VectorizeWidth: case LoopHintAttr::InterleaveCount: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::TapirGrainsize: llvm_unreachable("Options cannot be disabled."); break; } @@ -696,6 +732,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::InterleaveCount: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::TapirGrainsize: llvm_unreachable("Options cannot enabled."); break; } @@ -718,6 +755,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::Distribute: case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: + case LoopHintAttr::TapirGrainsize: llvm_unreachable("Options cannot be used to assume mem safety."); break; } @@ -740,6 +778,7 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::PipelineDisabled: case LoopHintAttr::PipelineInitiationInterval: case LoopHintAttr::VectorizePredicate: + case LoopHintAttr::TapirGrainsize: llvm_unreachable("Options cannot be used with 'full' hint."); break; } @@ -773,6 +812,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, case LoopHintAttr::PipelineInitiationInterval: setPipelineInitiationInterval(ValueInt); break; + case LoopHintAttr::TapirGrainsize: + setTapirGrainsize(ValueInt); + break; case LoopHintAttr::Unroll: case LoopHintAttr::UnrollAndJam: case LoopHintAttr::VectorizePredicate: diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 856e892f712e31..9c030e63e93366 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -70,6 +70,9 @@ struct LoopAttributes { /// llvm.unroll. unsigned UnrollAndJamCount; + /// tapir.loop.grainsize. + unsigned TapirGrainsize; + /// Value for llvm.loop.distribute.enable metadata. LVEnableState DistributeEnable; @@ -81,6 +84,12 @@ struct LoopAttributes { /// Value for whether the loop is required to make progress. bool MustProgress; + + /// Tapir-loop spawning strategy. + enum LSStrategy { Sequential, DAC }; + + /// Value for tapir.loop.spawn.strategy metadata. + LSStrategy SpawnStrategy; }; /// Information used when generating a structured loop. @@ -172,6 +181,9 @@ class LoopInfo { createFullUnrollMetadata(const LoopAttributes &Attrs, llvm::ArrayRef LoopProperties, bool &HasUserTransforms); + void getTapirLoopProperties( + const LoopAttributes &Attrs, + llvm::SmallVectorImpl &LoopProperties); /// @} /// Create a LoopID for this loop, including transformation-unspecific @@ -285,6 +297,14 @@ class LoopInfoStack { /// Set no progress for the next loop pushed. void setMustProgress(bool P) { StagedAttrs.MustProgress = P; } + /// Set the Tapir-loop spawning strategy for the next loop pushed. + void setSpawnStrategy(const LoopAttributes::LSStrategy &Strat) { + StagedAttrs.SpawnStrategy = Strat; + } + + /// Set the Tapir-loop grainsize for the next loop pushed. + void setTapirGrainsize(unsigned C) { StagedAttrs.TapirGrainsize = C; } + private: /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 2184b8600d764c..5fde8c7fbfbc2e 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -109,6 +109,7 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { case Stmt::DefaultStmtClass: case Stmt::CaseStmtClass: case Stmt::SEHLeaveStmtClass: + case Stmt::CilkSyncStmtClass: llvm_unreachable("should have emitted these statements as simple"); #define STMT(Type, Base) @@ -169,6 +170,15 @@ void CodeGenFunction::EmitStmt(const Stmt *S, ArrayRef Attrs) { EmitCapturedStmt(*CS, CS->getCapturedRegionKind()); } break; + case Stmt::CilkSpawnStmtClass: + EmitCilkSpawnStmt(cast(*S)); + break; + case Stmt::CilkForStmtClass: + EmitCilkForStmt(cast(*S), Attrs); + break; + case Stmt::CilkScopeStmtClass: + EmitCilkScopeStmt(cast(*S)); + break; case Stmt::ObjCAtTryStmtClass: EmitObjCAtTryStmt(cast(*S)); break; @@ -473,6 +483,9 @@ bool CodeGenFunction::EmitSimpleStmt(const Stmt *S, case Stmt::SEHLeaveStmtClass: EmitSEHLeaveStmt(cast(*S)); break; + case Stmt::CilkSyncStmtClass: + EmitCilkSyncStmt(cast(*S)); + break; } return true; } @@ -488,6 +501,7 @@ Address CodeGenFunction::EmitCompoundStmt(const CompoundStmt &S, bool GetLast, // Keep track of the current cleanup stack depth, including debug scopes. LexicalScope Scope(*this, S.getSourceRange()); + SyncRegionRAII StmtSR(*this); return EmitCompoundStmtWithoutScope(S, GetLast, AggSlot); } @@ -1310,6 +1324,13 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { // Emit the result value, even if unused, to evaluate the side effects. const Expr *RV = S.getRetValue(); + // If RV is a CilkSpawnExpr, handle the CilkSpawnExpr part here. + if (const CilkSpawnExpr *CS = dyn_cast_or_null(RV)) { + IsSpawned = true; + PushDetachScope(); + RV = CS->getSpawnedExpr(); + } + // Record the result expression of the return statement. The recorded // expression is used to determine whether a block capture's lifetime should // end at the end of the full expression as opposed to the end of the scope @@ -1321,6 +1342,9 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { SaveRetExprRAII SaveRetExpr(RV, *this); RunCleanupsScope cleanupScope(*this); + bool CleanupsSaved = false; + if (IsSpawned) + CleanupsSaved = CurDetachScope->MaybeSaveCleanupsScope(&cleanupScope); if (const auto *EWC = dyn_cast_or_null(RV)) RV = EWC->getSubExpr(); // FIXME: Clean this up by using an LValue for ReturnTemp, @@ -1379,8 +1403,19 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { if (!RV || RV->isEvaluatable(getContext())) ++NumSimpleReturnExprs; + if (CleanupsSaved) + CurDetachScope->CleanupDetach(); cleanupScope.ForceCleanup(); - EmitBranchThroughCleanup(ReturnBlock); + if (IsSpawned) { + if (!(CurDetachScope && CurDetachScope->IsDetachStarted())) + FailedSpawnWarning(RV->getExprLoc()); + // Pop the detach scope + IsSpawned = false; + PopDetachScope(); + } + + bool CompilingCilk = (getLangOpts().getCilk() != LangOptions::Cilk_none); + EmitBranchThroughCleanup(ReturnBlock, CompilingCilk); } void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) { diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt index 1debeb6d9cce9e..8b58f2767b2a7c 100644 --- a/clang/lib/CodeGen/CMakeLists.txt +++ b/clang/lib/CodeGen/CMakeLists.txt @@ -37,6 +37,7 @@ add_clang_library(clangCodeGen CGAtomic.cpp CGBlocks.cpp CGBuiltin.cpp + CGCilk.cpp CGCUDANV.cpp CGCUDARuntime.cpp CGCXX.cpp diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 7ef893cb1a2d79..d56ace27f6c167 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -268,6 +268,10 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { case Type::Atomic: type = cast(type)->getValueType(); continue; + + case Type::Hyperobject: + type = cast(type)->getElementType(); + continue; } llvm_unreachable("unknown type kind!"); } @@ -331,6 +335,8 @@ static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) { void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { assert(BreakContinueStack.empty() && "mismatched push/pop in break/continue stack!"); + assert(!CurDetachScope && + "mismatched push/pop in detach-scope stack!"); bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && NumSimpleReturnExprs == NumReturnExprs @@ -362,6 +368,8 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { bool HasOnlyLifetimeMarkers = HasCleanups && EHStack.containsOnlyLifetimeMarkers(PrologueCleanupDepth); bool EmitRetDbgLoc = !HasCleanups || HasOnlyLifetimeMarkers; + bool SyncEmitted = false; + bool CompilingCilk = (getLangOpts().getCilk() != LangOptions::Cilk_none); std::optional OAL; if (HasCleanups) { @@ -376,12 +384,29 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { OAL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc); } - PopCleanupBlocks(PrologueCleanupDepth); + // If we're compiling Cilk, PopCleanupBlocks should emit a _Cilk_sync before + // any cleanups. + PopCleanupBlocks(PrologueCleanupDepth, {}, CompilingCilk); + SyncEmitted = true; + } else if (CompilingCilk && Builder.GetInsertBlock() && + ReturnBlock.getBlock()->use_empty()) { + // If we're compiling Cilk, emit an implicit sync for the function. In this + // case, EmitReturnBlock will recycle Builder.GetInsertBlock() for the + // function's return block, so we insert the implicit _Cilk_sync before + // calling EmitReturnBlock. + EmitImplicitSyncCleanup(); + SyncEmitted = true; } // Emit function epilog (to return). llvm::DebugLoc Loc = EmitReturnBlock(); + if (CompilingCilk && !SyncEmitted) { + // If we're compiling Cilk, emit an implicit sync for the function. + EmitImplicitSyncCleanup(); + SyncEmitted = true; + } + if (ShouldInstrumentFunction()) { if (CGM.getCodeGenOpts().InstrumentFunctions) CurFn->addFnAttr("instrument-function-exit", "__cyg_profile_func_exit"); @@ -526,6 +551,11 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { ReturnValue = Address::invalid(); } } + + if (CurSyncRegion) { + PopSyncRegion(); + assert(!CurSyncRegion && "Nested sync regions at end of function."); + } } /// ShouldInstrumentFunction - Return true if the current function should be @@ -790,6 +820,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->addFnAttr(llvm::Attribute::SanitizeMemTag); if (SanOpts.has(SanitizerKind::Thread)) Fn->addFnAttr(llvm::Attribute::SanitizeThread); + if (SanOpts.has(SanitizerKind::Cilk)) + Fn->addFnAttr(llvm::Attribute::SanitizeCilk); if (SanOpts.hasOneOf(SanitizerKind::Memory | SanitizerKind::KernelMemory)) Fn->addFnAttr(llvm::Attribute::SanitizeMemory); } @@ -916,6 +948,17 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, llvm::Triple::CODE16) Fn->addFnAttr("patchable-function", "prologue-short-redirect"); + // Add Cilk attributes + if (D && getLangOpts().getCilk() != LangOptions::Cilk_none) { + if (D->getAttr()) + Fn->setStrandPure(); + if (D->getAttr()) + Fn->addFnAttr(llvm::Attribute::Stealable); + } + + if (D && D->getAttr()) + Fn->addFnAttr(llvm::Attribute::Injective); + // Add no-jump-tables value. if (CGM.getCodeGenOpts().NoUseJumpTables) Fn->addFnAttr("no-jump-tables", "true"); @@ -2304,6 +2347,10 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { type = cast(ty)->getElementType(); break; + case Type::Hyperobject: + type = cast(ty)->getElementType(); + break; + case Type::VariableArray: { // Losing element qualification here is fine. const VariableArrayType *vat = cast(ty); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8722fd4550e4a7..73c0aaef94d74a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -23,6 +23,7 @@ #include "VarBypassDetector.h" #include "clang/AST/CharUnits.h" #include "clang/AST/CurrentSourceLocExprScope.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -774,6 +775,18 @@ class CodeGenFunction : public CodeGenTypeCache { /// we're currently inside a conditionally-evaluated expression. template void pushFullExprCleanup(CleanupKind kind, As... A) { + if (SpawnedCleanup) { + if (kind & EHCleanup) + pushFullExprCleanupImpl( + static_cast(kind & ~NormalCleanup), A...); + pushCleanupAfterFullExpr(kind, A...); + return; + } + pushFullExprCleanupImpl(kind, A...); + } + + template + void pushFullExprCleanupImpl(CleanupKind kind, As... A) { // If we're not in a conditional branch, or if none of the // arguments requires saving, then use the unconditional cleanup. if (!isInConditionalBranch()) @@ -848,9 +861,13 @@ class CodeGenFunction : public CodeGenTypeCache { void PushDestructorCleanup(const CXXDestructorDecl *Dtor, QualType T, Address Addr); + /// EmitImplicitSyncCleanup - Emit an implicit sync. + void EmitImplicitSyncCleanup(llvm::Instruction *SyncRegion = nullptr); + /// PopCleanupBlock - Will pop the cleanup entry on the stack and /// process all branch fixups. - void PopCleanupBlock(bool FallThroughIsBranchThrough = false); + void PopCleanupBlock(bool FallThroughIsBranchThrough = false, + bool AfterSync = false); /// DeactivateCleanupBlock - Deactivates the given cleanup block. /// The block cannot be reactivated. Pops it if it's the top of the @@ -881,6 +898,10 @@ class CodeGenFunction : public CodeGenTypeCache { bool OldDidCallStackSave; protected: bool PerformCleanup; + bool CleanupAfterSync; + /// Protected method to control whether a sync is inserted before any + /// cleanups. + void setCleanupAfterSync(bool V = true) { CleanupAfterSync = V; } private: RunCleanupsScope(const RunCleanupsScope &) = delete; @@ -892,7 +913,7 @@ class CodeGenFunction : public CodeGenTypeCache { public: /// Enter a new cleanup scope. explicit RunCleanupsScope(CodeGenFunction &CGF) - : PerformCleanup(true), CGF(CGF) + : PerformCleanup(true), CleanupAfterSync(false), CGF(CGF) { CleanupStackDepth = CGF.EHStack.stable_begin(); LifetimeExtendedCleanupStackSize = @@ -924,10 +945,64 @@ class CodeGenFunction : public CodeGenTypeCache { assert(PerformCleanup && "Already forced cleanup"); CGF.DidCallStackSave = OldDidCallStackSave; CGF.PopCleanupBlocks(CleanupStackDepth, LifetimeExtendedCleanupStackSize, - ValuesToReload); + ValuesToReload, CleanupAfterSync); PerformCleanup = false; CGF.CurrentCleanupScopeDepth = OldCleanupScopeDepth; } + + /// Pops cleanup blocks until the given savepoint is reached, then add the + /// cleanups from the given savepoint in the lifetime-extended cleanups + /// stack. + void PopCleanupBlocksAndDetach( + std::initializer_list ValuesToReload) { + size_t OldLifetimeExtendedSize = LifetimeExtendedCleanupStackSize; + CGF.PopCleanupBlocks(CleanupStackDepth, ValuesToReload); + + // Do the detach, and get the new cleanup stack depth. + CGF.CurDetachScope->PushSpawnedTaskTerminate(); + CleanupStackDepth = CGF.EHStack.stable_begin(); + + // Move our deferred cleanups onto the EH stack. This scope will deal + // with these deferred cleanups when it is destroyed. + for (size_t I = OldLifetimeExtendedSize, + E = CGF.LifetimeExtendedCleanupStack.size(); I != E; /**/) { + // Alignment should be guaranteed by the vptrs in the individual cleanups. + assert((I % alignof(LifetimeExtendedCleanupHeader) == 0) && + "misaligned cleanup stack entry"); + + LifetimeExtendedCleanupHeader &Header = + reinterpret_cast( + CGF.LifetimeExtendedCleanupStack[I]); + I += sizeof(Header); + + CGF.EHStack.pushCopyOfCleanup(Header.getKind(), + &CGF.LifetimeExtendedCleanupStack[I], + Header.getSize()); + I += Header.getSize(); + + if (Header.isConditional()) { + Address ActiveFlag = + reinterpret_cast
(CGF.LifetimeExtendedCleanupStack[I]); + CGF.initFullExprCleanupWithFlag(ActiveFlag); + I += sizeof(ActiveFlag); + } + } + CGF.LifetimeExtendedCleanupStack.resize(OldLifetimeExtendedSize); + } + + void DoDetach(std::initializer_list ValuesToReload = {}) { + IsSpawnedScope SpawnedScp(&CGF); + CGF.DidCallStackSave = OldDidCallStackSave; + + PopCleanupBlocksAndDetach(ValuesToReload); + + LifetimeExtendedCleanupStackSize = + CGF.LifetimeExtendedCleanupStack.size(); + OldDidCallStackSave = CGF.DidCallStackSave; + CGF.DidCallStackSave = false; + OldCleanupScopeDepth = CGF.CurrentCleanupScopeDepth; + CGF.CurrentCleanupScopeDepth = CleanupStackDepth; + } }; // Cleanup stack depth of the RunCleanupsScope that was pushed most recently. @@ -1134,11 +1209,497 @@ class CodeGenFunction : public CodeGenTypeCache { ~OMPLocalDeclMapRAII() { SavedMap.swap(CGF.LocalDeclMap); } }; + /// In Cilk, flag indicating whether the current call/invoke is spawned. + bool IsSpawned = false; + bool SpawnedCleanup = false; + + /// RAII object to set/unset CodeGenFunction::IsSpawned. + class IsSpawnedScope { + CodeGenFunction *CGF; + bool OldIsSpawned; + bool OldSpawnedCleanup; + public: + IsSpawnedScope(CodeGenFunction *CGF); + ~IsSpawnedScope(); + bool OldScopeIsSpawned() const; + void RestoreOldScope(); + }; + + /// Cleanup to ensure a sync is inserted. If no SyncRegion is specified, then + /// this cleanup actually serves as a placeholder in EHStack, which ensures + /// that an implicit sync is inserted before any normal cleanups. + struct ImplicitSyncCleanup final : public EHScopeStack::Cleanup { + llvm::Instruction *SyncRegion; + public: + ImplicitSyncCleanup(llvm::Instruction *SyncRegion = nullptr) + : SyncRegion(SyncRegion) {} + + void Emit(CodeGenFunction &CGF, Flags F) override { + if (SyncRegion) + CGF.EmitImplicitSyncCleanup(SyncRegion); + } + }; + + // Flag indicating whether CodeGen is currently emitting within a some + // _Cilk_scope. + bool WithinCilkScope = false; + + /// Cleanup to ensure a tapir.runtime.end intrinsic is inserted. + struct TapirRuntimeEndCleanup final : public EHScopeStack::Cleanup { + llvm::Instruction *TapirRuntimeStart; + + public: + TapirRuntimeEndCleanup(llvm::Instruction *TapirRuntimeStart) + : TapirRuntimeStart(TapirRuntimeStart) {} + + void Emit(CodeGenFunction &CGF, Flags F) override { + CGF.Builder.CreateCall( + CGF.CGM.getIntrinsic(llvm::Intrinsic::tapir_runtime_end), + {TapirRuntimeStart}); + } + }; + + // Subclass of RunCleanupsScope that ensures an implicit sync is emitted + // before cleanups. + class ImplicitSyncScope : public RunCleanupsScope { + ImplicitSyncScope(const ImplicitSyncScope &) = delete; + void operator=(const ImplicitSyncScope &) = delete; + public: + explicit ImplicitSyncScope(CodeGenFunction &CGF) : RunCleanupsScope(CGF) { + setCleanupAfterSync(); + CGF.EHStack.pushCleanup(NormalCleanup); + } + + ~ImplicitSyncScope() { + if (PerformCleanup) + ForceCleanup(); + } + + void ForceCleanup() { + RunCleanupsScope::ForceCleanup(); + } + }; + + /// A sync region is a collection of spawned tasks and syncs such that syncs + /// in the collection may wait on the spawned tasks in the same collection + /// (control-flow permitting). In Cilk, certain constructs, such as functions + /// _Cilk_spawn bodies, or _Cilk_for loop bodies, use a separate sync region + /// to handle spawning and syncing of tasks within that construct. + class SyncRegion { + CodeGenFunction &CGF; + SyncRegion *ParentRegion; + llvm::Instruction *SyncRegionStart = nullptr; + ImplicitSyncScope *InnerSyncScope = nullptr; + + SyncRegion(const SyncRegion &) = delete; + void operator=(const SyncRegion &) = delete; + public: + explicit SyncRegion(CodeGenFunction &CGF) + : CGF(CGF), ParentRegion(CGF.CurSyncRegion) {} + + ~SyncRegion() { + if (InnerSyncScope) + delete InnerSyncScope; + CGF.CurSyncRegion = ParentRegion; + } + + llvm::Instruction *getSyncRegionStart() const { + return SyncRegionStart; + } + void setSyncRegionStart(llvm::Instruction *SRStart) { + SyncRegionStart = SRStart; + } + + void addImplicitSync() { + if (!InnerSyncScope) + InnerSyncScope = new ImplicitSyncScope(CGF); + } + }; + + /// The current sync region. + SyncRegion *CurSyncRegion = nullptr; + + SyncRegion *PushSyncRegion() { + CurSyncRegion = new SyncRegion(*this); + return CurSyncRegion; + } + + llvm::Instruction *EmitSyncRegionStart(); + + void PopSyncRegion() { + delete CurSyncRegion; // ~SyncRegion updates CurSyncRegion + } + + void EnsureSyncRegion() { + if (!CurSyncRegion) + PushSyncRegion(); + if (!CurSyncRegion->getSyncRegionStart()) + CurSyncRegion->setSyncRegionStart(EmitSyncRegionStart()); + } + + // Flag to indicate whether the current scope is synced. Currently this flag + // is used to optionally push a SyncRegion inside of a lexical scope, so that + // any cleanups run within that lexical scope occur after an implicit sync. + bool ScopeIsSynced = false; + + // RAII for maintaining CodeGenFunction::ScopeIsSynced. + class SyncedScopeRAII { + CodeGenFunction &CGF; + bool OldScopeIsSynced; + public: + SyncedScopeRAII(CodeGenFunction &CGF) + : CGF(CGF), OldScopeIsSynced(CGF.ScopeIsSynced) {} + ~SyncedScopeRAII() { CGF.ScopeIsSynced = OldScopeIsSynced; } + }; + + // RAII for pushing and popping a sync region. + class SyncRegionRAII { + CodeGenFunction &CGF; + bool OldScopeIsSynced; + public: + SyncRegionRAII(CodeGenFunction &CGF, bool addImplicitSync = true) + : CGF(CGF), OldScopeIsSynced(CGF.ScopeIsSynced) { + if (CGF.ScopeIsSynced) { + CGF.PushSyncRegion(); + // If requested, add an implicit sync onto this sync region. + if (addImplicitSync) + CGF.CurSyncRegion->addImplicitSync(); + + CGF.ScopeIsSynced = false; + } + } + ~SyncRegionRAII() { + if (OldScopeIsSynced) { + CGF.PopSyncRegion(); + CGF.ScopeIsSynced = OldScopeIsSynced; + } + } + }; + + /// Cleanup to ensure a taskframe is ended with a taskframe.resume on an + /// exception-handling path. + struct CallTaskEnd final : public EHScopeStack::Cleanup { + llvm::Value *TaskFrame; + public: + CallTaskEnd(llvm::Value *TaskFrame) : TaskFrame(TaskFrame) {} + void Emit(CodeGenFunction &CGF, Flags F) override { + // Recreate the landingpad's return value for the rethrow invoke. Tapir + // lowering will replace this rethrow with a resume. + llvm::Value *Exn = CGF.Builder.CreateLoad( + Address(CGF.ExceptionSlot, CGF.Int8PtrTy, CGF.getPointerAlign()), + "exn"); + llvm::Value *Sel = CGF.Builder.CreateLoad( + Address(CGF.EHSelectorSlot, CGF.Int32Ty, CharUnits::fromQuantity(4)), + "sel"); + llvm::Type *LPadType = + llvm::StructType::get(Exn->getType(), Sel->getType()); + llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); + + llvm::Function *TaskFrameResume = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_resume, + { LPadVal->getType() }); + CGF.Builder.CreateInvoke(TaskFrameResume, CGF.getUnreachableBlock(), + CGF.CurDetachScope->getTempInvokeDest(), + { TaskFrame, LPadVal }); + CGF.Builder.SetInsertPoint(CGF.CurDetachScope->getTempInvokeDest()); + } + }; + + /// Cleanup to ensure spawned task is ended with a detached.rethrow on an + /// exception-handling path. + struct CallDetRethrow final : public EHScopeStack::Cleanup { + llvm::Value *SyncRegion; + llvm::BasicBlock *TempInvokeDest; + public: + CallDetRethrow(llvm::Value *SyncRegion, + llvm::BasicBlock *TempInvokeDest = nullptr) + : SyncRegion(SyncRegion), TempInvokeDest(TempInvokeDest) {} + void Emit(CodeGenFunction &CGF, Flags F) override { + if (!TempInvokeDest) + TempInvokeDest = CGF.CurDetachScope->getTempInvokeDest(); + + // Recreate the landingpad's return value for the rethrow invoke. Tapir + // lowering will replace this rethrow with a resume. + llvm::Value *Exn = CGF.Builder.CreateLoad( + Address(CGF.ExceptionSlot, CGF.Int8PtrTy, CGF.getPointerAlign()), + "exn"); + llvm::Value *Sel = CGF.Builder.CreateLoad( + Address(CGF.EHSelectorSlot, CGF.Int32Ty, CharUnits::fromQuantity(4)), + "sel"); + llvm::Type *LPadType = + llvm::StructType::get(Exn->getType(), Sel->getType()); + llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); + + llvm::Function *DetachedRethrow = + CGF.CGM.getIntrinsic(llvm::Intrinsic::detached_rethrow, + { LPadVal->getType() }); + CGF.Builder.CreateInvoke(DetachedRethrow, CGF.getUnreachableBlock(), + TempInvokeDest, { SyncRegion, LPadVal }); + CGF.Builder.SetInsertPoint(TempInvokeDest); + } + }; + + /// Object to manage creation of spawned tasks using Tapir instructions. + /// + /// Conceptually, each spawned task corresponds to a detach scope, which gets + /// its own copy of specific CodeGenFunction state, such as its own alloca + /// insert point and exception-handling state. In practice, detach scopes + /// maintain two scopes for each spawned task: a scope corresponding with the + /// taskframe of the task, and a scope for the task itself. + class DetachScope { + CodeGenFunction &CGF; + bool DetachStarted = false; + bool DetachCleanedUp = false; + llvm::DetachInst *Detach = nullptr; + llvm::BasicBlock *DetachedBlock = nullptr; + llvm::BasicBlock *ContinueBlock = nullptr; + + // Pointer to the parent detach scope. + DetachScope *ParentScope; + + // Possible cleanup scope from a child ExprWithCleanups of a CilkSpawnStmt. + // We keep track of this scope in order to properly adjust the scope when + // the emission of the task itself injects an additional cleanup onto + // EHStack. + RunCleanupsScope *StmtCleanupsScope = nullptr; + + // Old alloca insertion points from the CGF to restore when we're done + // emitting the spawned task and associated taskframe. + llvm::AssertingVH OldAllocaInsertPt = nullptr; + // Alloca insertion point for the taskframe, which we save and restore + // around the emission of the spawned task itself. + llvm::AssertingVH TFAllocaInsertPt = nullptr; + // A temporary invoke destination, maintained to handle the emission of + // detached.rethrow and taskframe.resume intrinsics on exception-handling + // paths out of a spawned task or its taskframe. + llvm::BasicBlock *TempInvokeDest = nullptr; + + // Old EH state from the CGF to restore when we're done emitting the spawned + // task and associated taskframe. + llvm::BasicBlock *OldEHResumeBlock = nullptr; + llvm::Value *OldExceptionSlot = nullptr; + llvm::AllocaInst *OldEHSelectorSlot = nullptr; + Address OldNormalCleanupDest = Address::invalid(); + + // EH state for the taskframe, which we save and restore around the emission + // of the spawned task itself. + llvm::BasicBlock *TFEHResumeBlock = nullptr; + llvm::Value *TFExceptionSlot = nullptr; + llvm::AllocaInst *TFEHSelectorSlot = nullptr; + Address TFNormalCleanupDest = Address::invalid(); + + // Saved state in an initialized detach scope. + llvm::AssertingVH SavedDetachedAllocaInsertPt = nullptr; + + // Information about a reference temporary created early in the detached + // block. + Address RefTmp = Address::invalid(); + StorageDuration RefTmpSD; + + // Optional taskframe created separately from detach. + llvm::Value *TaskFrame = nullptr; + + void InitDetachScope(); + + DetachScope(const DetachScope &) = delete; + void operator=(const DetachScope &) = delete; + + public: + /// Enter a new detach scope + explicit DetachScope(CodeGenFunction &CGF) + : CGF(CGF), ParentScope(CGF.CurDetachScope) { + CGF.CurDetachScope = this; + EnsureTaskFrame(); + } + + /// Exit this detach scope. + ~DetachScope() { + if (TempInvokeDest && TempInvokeDest->use_empty()) + delete TempInvokeDest; + CGF.CurDetachScope = ParentScope; + } + + // Optionally save the specified cleanups scope, so it can be properly + // updated when a spawned task is emitted. + bool MaybeSaveCleanupsScope(RunCleanupsScope *Scope) { + if (!StmtCleanupsScope) { + StmtCleanupsScope = Scope; + return true; + } + return false; + } + + // Methods to handle the taskframe associated with the spawned task. + void EnsureTaskFrame(); + llvm::Value *GetTaskFrame() { return TaskFrame; } + + // Create nested exception-handling state for a taskframe or spawned task. + void CreateTaskFrameEHState(); + void CreateDetachedEHState(); + // Restore ancestor exception-handling state of a spawned task or taskframe. + // Returns a pointer to any EHResumeBlock that was generated during the + // emission of the spawned task or taskframe. + llvm::BasicBlock *RestoreTaskFrameEHState(); + llvm::BasicBlock *RestoreParentEHState(); + + // Get a temporary destination for an invoke, creating a new one if + // necessary. + llvm::BasicBlock *getTempInvokeDest() { + if (!TempInvokeDest) + TempInvokeDest = CGF.createBasicBlock("temp.invoke.dest"); + return TempInvokeDest; + } + + // Start the spawned task, i.e., by emitting a detach instruction and + // setting up nested CGF state. + void StartDetach(); + // Returns true if the spawned task has started. + bool IsDetachStarted() const { return DetachStarted; } + // Push a terminator for the spawned task onto EHStack. + void PushSpawnedTaskTerminate(); + // Clean up state for the spawned task. + void CleanupDetach(); + // Emit the end of the spawned task, i.e., a reattach. + void EmitTaskEnd(); + // Finish the spawned task. + void FinishDetach(); + + // Create a temporary for the spawned task, specifically, before the spawned + // task has started. + Address CreateDetachedMemTemp(QualType Ty, StorageDuration SD, + const Twine &Name = "det.tmp"); + }; + + /// The current detach scope. + DetachScope *CurDetachScope = nullptr; + + /// Push a new detach scope onto the stack, but do not begin the detach. + void PushDetachScope() { + EnsureSyncRegion(); + if (!CurDetachScope || CurDetachScope->IsDetachStarted()) + CurDetachScope = new DetachScope(*this); + } + + /// Finish the current detach scope and pop it off the stack. + void PopDetachScope() { + CurDetachScope->FinishDetach(); + delete CurDetachScope; + } + + /// Produce a warning that we failed to emit a spawn. + void FailedSpawnWarning(SourceLocation SLoc) { + DiagnosticsEngine &Diags = CGM.getDiags(); + unsigned DiagID = Diags.getCustomDiagID(DiagnosticsEngine::Warning, + "Failed to emit spawn"); + Diags.Report(SLoc, DiagID); + } + + // RAII for automatically popping detach scopes at the end of code-generating + // an expression. + class DetachScopeRAII { + CodeGenFunction &CGF; + CodeGenFunction::DetachScope *StartingDetachScope; + public: + DetachScopeRAII(CodeGenFunction &CGF) + : CGF(CGF), StartingDetachScope(CGF.CurDetachScope) {} + ~DetachScopeRAII() { + if (!CGF.CurDetachScope || CGF.CurDetachScope == StartingDetachScope) + // No detach scope was pushed, so there's nothing to do. + return; + CGF.PopDetachScope(); + assert(CGF.CurDetachScope == StartingDetachScope && + "Unexpected detach scope"); + CGF.IsSpawned = false; + } + }; + + // Simple RAII object for creating an unassociated taskframe. + class TaskFrameScope { + CodeGenFunction &CGF; + + // Old alloca insertion points from the CGF to restore when we're done + // emitting the spawned task and associated taskframe. + llvm::AssertingVH OldAllocaInsertPt = nullptr; + + // A temporary invoke destination, maintained to handle the emission of + // detached.rethrow and taskframe.resume intrinsics on exception-handling + // paths out of a spawned task or its taskframe. + llvm::BasicBlock *TempInvokeDest = nullptr; + + // Old EH state from the CGF to restore when we're done emitting the spawned + // task and associated taskframe. + llvm::BasicBlock *OldEHResumeBlock = nullptr; + llvm::Value *OldExceptionSlot = nullptr; + llvm::AllocaInst *OldEHSelectorSlot = nullptr; + Address OldNormalCleanupDest = Address::invalid(); + + // Taskframe created separately from detach. + llvm::Value *TaskFrame = nullptr; + public: + TaskFrameScope(CodeGenFunction &CGF); + ~TaskFrameScope(); + + llvm::Value *getTaskFrame() const { return TaskFrame; } + + // Get a temporary destination for an invoke, creating a new one if + // necessary. + llvm::BasicBlock *getTempInvokeDest() { + if (!TempInvokeDest) + TempInvokeDest = CGF.createBasicBlock("temp.invoke.dest"); + return TempInvokeDest; + } + }; + + /// Cleanup to ensure a taskframe is ended with a taskframe.resume on an + /// exception-handling path. + struct EndUnassocTaskFrame final : public EHScopeStack::Cleanup { + TaskFrameScope *TFScope; + public: + EndUnassocTaskFrame(TaskFrameScope *TFScope) : TFScope(TFScope) {} + void Emit(CodeGenFunction &CGF, Flags F) override { + if (F.isForNormalCleanup()) { + // For normal cleanups, just insert a call to taskframe.end. + llvm::Function *TaskFrameEnd = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_end); + assert(TFScope->getTaskFrame() && "No taskframe in TFScope"); + CGF.Builder.CreateCall(TaskFrameEnd, { TFScope->getTaskFrame() }); + return; + } + + // Recreate the landingpad's return value for the rethrow invoke. Tapir + // lowering will replace this rethrow with a resume. + llvm::Value *Exn = CGF.Builder.CreateLoad( + Address(CGF.ExceptionSlot, CGF.Int8PtrTy, CGF.getPointerAlign()), + "exn"); + llvm::Value *Sel = CGF.Builder.CreateLoad( + Address(CGF.EHSelectorSlot, CGF.Int32Ty, CharUnits::fromQuantity(4)), + "sel"); + llvm::Type *LPadType = + llvm::StructType::get(Exn->getType(), Sel->getType()); + llvm::Value *LPadVal = llvm::UndefValue::get(LPadType); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Exn, 0, "lpad.val"); + LPadVal = CGF.Builder.CreateInsertValue(LPadVal, Sel, 1, "lpad.val"); + + llvm::Function *TaskFrameResume = + CGF.CGM.getIntrinsic(llvm::Intrinsic::taskframe_resume, + { LPadVal->getType() }); + CGF.Builder.CreateInvoke(TaskFrameResume, CGF.getUnreachableBlock(), + TFScope->getTempInvokeDest(), + { TFScope->getTaskFrame(), LPadVal }); + CGF.Builder.SetInsertPoint(TFScope->getTempInvokeDest()); + } + }; + /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added. void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, - std::initializer_list ValuesToReload = {}); + std::initializer_list ValuesToReload = {}, + bool AfterSync = false); /// Takes the old cleanup stack size and emits the cleanup blocks /// that have been added, then adds all lifetime-extended cleanups from @@ -1146,7 +1707,8 @@ class CodeGenFunction : public CodeGenTypeCache { void PopCleanupBlocks(EHScopeStack::stable_iterator OldCleanupStackSize, size_t OldLifetimeExtendedStackSize, - std::initializer_list ValuesToReload = {}); + std::initializer_list ValuesToReload = {}, + bool AfterSync = false); void ResolveBranchFixups(llvm::BasicBlock *Target); @@ -1169,7 +1731,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// EmitBranchThroughCleanup - Emit a branch from the current insert /// block through the normal cleanup handling code (if any) and then /// on to \arg Dest. - void EmitBranchThroughCleanup(JumpDest Dest); + void EmitBranchThroughCleanup(JumpDest Dest, bool AfterSync = false); /// isObviouslyBranchWithoutCleanups - Return true if a branch to the /// specified destination obviously has no cleanups to run. 'false' is always @@ -2089,6 +2651,7 @@ class CodeGenFunction : public CodeGenTypeCache { switch (kind) { case QualType::DK_none: return false; + case QualType::DK_hyperobject: case QualType::DK_cxx_destructor: case QualType::DK_objc_weak_lifetime: case QualType::DK_nontrivial_c_struct: @@ -2924,6 +3487,7 @@ class CodeGenFunction : public CodeGenTypeCache { bool ZeroInitialization = false); static Destroyer destroyCXXObject; + static Destroyer destroyHyperobject; void EmitCXXDestructorCall(const CXXDestructorDecl *D, CXXDtorType Type, bool ForVirtualBase, bool Delegating, Address This, @@ -3138,6 +3702,14 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitAutoVarCleanups(const AutoVarEmission &emission); void emitAutoVarTypeCleanup(const AutoVarEmission &emission, QualType::DestructionKind dtorKind); + struct ReducerCallbacks { + Expr *Identity; + Expr *Reduce; + }; + + bool getReducer(const VarDecl *D, ReducerCallbacks &CB); + void EmitReducerInit(const VarDecl *D, const ReducerCallbacks &CB, + llvm::Value *Addr); /// Emits the alloca and debug information for the size expressions for each /// dimension of an array. It registers the association of its (1-dimensional) @@ -3270,6 +3842,13 @@ class CodeGenFunction : public CodeGenTypeCache { void EmitCaseStmtRange(const CaseStmt &S, ArrayRef Attrs); void EmitAsmStmt(const AsmStmt &S); + void EmitCilkScopeStmt(const CilkScopeStmt &S); + void EmitCilkSpawnStmt(const CilkSpawnStmt &S); + void EmitCilkSyncStmt(const CilkSyncStmt &S); + void EmitCilkForStmt(const CilkForStmt &S, + ArrayRef Attrs = std::nullopt); + LValue EmitCilkSpawnExprLValue(const CilkSpawnExpr *E); + void EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S); void EmitObjCAtTryStmt(const ObjCAtTryStmt &S); void EmitObjCAtThrowStmt(const ObjCAtThrowStmt &S); @@ -4427,7 +5006,8 @@ class CodeGenFunction : public CodeGenTypeCache { /// EmitScalarExpr - Emit the computation of the specified expression of LLVM /// scalar type, returning the result. - llvm::Value *EmitScalarExpr(const Expr *E , bool IgnoreResultAssign = false); + llvm::Value *EmitScalarExpr(const Expr *E, bool IgnoreResultAssign = false); + void EmitScalarExprIntoLValue(const Expr *E, LValue dest, bool isInit); /// Emit a conversion from the specified type to the specified destination /// type, both of which are LLVM scalar types. diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index f09d1129b128a3..d86ac4712fb3f0 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -5101,7 +5101,11 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D, getContext().GetGVALinkageForVariable(D) == GVA_AvailableExternally; bool NeedsGlobalDtor = !IsDefinitionAvailableExternally && - D->needsDestruction(getContext()) == QualType::DK_cxx_destructor; + (D->needsDestruction(getContext()) == QualType::DK_cxx_destructor || + D->needsDestruction(getContext()) == QualType::DK_hyperobject); + NeedsGlobalCtor = + !IsDefinitionAvailableExternally && + D->needsDestruction(getContext()) == QualType::DK_hyperobject; const VarDecl *InitDecl; const Expr *InitExpr = D->getAnyInitializer(InitDecl); diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index b80317529b72b6..2ba197664559e4 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -126,6 +126,9 @@ class PGOHash { BinaryOperatorNE, // The preceding values are available since PGO_HASH_V2. + // Cilk statements. These values are also available with PGO_HASH_V1. + CilkForStmt, + // Keep this last. It's for the static assert that follows. LastHashType }; @@ -267,6 +270,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { DEFINE_NESTABLE_TRAVERSAL(ObjCForCollectionStmt) DEFINE_NESTABLE_TRAVERSAL(CXXTryStmt) DEFINE_NESTABLE_TRAVERSAL(CXXCatchStmt) + DEFINE_NESTABLE_TRAVERSAL(CilkForStmt) /// Get version \p HashVersion of the PGO hash for \p S. PGOHash::HashType getHashType(PGOHashVersion HashVersion, const Stmt *S) { @@ -327,6 +331,8 @@ struct MapRegionCounters : public RecursiveASTVisitor { } break; } + case Stmt::CilkForStmtClass: + return PGOHash::CilkForStmt; } if (HashVersion >= PGO_HASH_V2) { @@ -744,6 +750,53 @@ struct ComputeRegionCounts : public ConstStmtVisitor { setCount(ParentCount + RHSCount - CurrentCount); RecordNextStmtCount = true; } + + void VisitCilkForStmt(const CilkForStmt *S) { + RecordStmtCount(S); + if (S->getInit()) + Visit(S->getInit()); + if (S->getLimitStmt()) + Visit(S->getLimitStmt()); + if (S->getBeginStmt()) + Visit(S->getBeginStmt()); + if (S->getEndStmt()) + Visit(S->getEndStmt()); + if (S->getLoopVarStmt()) + Visit(S->getLoopVarStmt()); + + uint64_t ParentCount = CurrentCount; + + BreakContinueStack.push_back(BreakContinue()); + // Visit the body region first. (This is basically the same as a while + // loop; see further comments in VisitWhileStmt.) + uint64_t BodyCount = setCount(PGO.getRegionCount(S)); + CountMap[S->getBody()] = BodyCount; + Visit(S->getBody()); + uint64_t BackedgeCount = CurrentCount; + BreakContinue BC = BreakContinueStack.pop_back_val(); + + // The increment is essentially part of the body but it needs to include + // the count for all the continue statements. + if (S->getInc()) { + uint64_t IncCount = setCount(BackedgeCount + BC.ContinueCount); + CountMap[S->getInc()] = IncCount; + Visit(S->getInc()); + } + + // ...then go back and propagate counts through the condition. + uint64_t CondCount = + setCount(ParentCount + BackedgeCount + BC.ContinueCount); + if (S->getInitCond()) { + CountMap[S->getInitCond()] = ParentCount; + Visit(S->getInitCond()); + } + if (S->getCond()) { + CountMap[S->getCond()] = CondCount; + Visit(S->getCond()); + } + setCount(BC.BreakCount + CondCount - BodyCount); + RecordNextStmtCount = true; + } }; } // end anonymous namespace diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 30021794a0bb3d..4f03ffe5f18696 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -548,6 +548,8 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = llvm::StructType::get(EltTy, EltTy); break; } + case Type::Hyperobject: + return ConvertType(cast(Ty)->getElementType()); case Type::LValueReference: case Type::RValueReference: { const ReferenceType *RTy = cast(Ty); diff --git a/clang/lib/CodeGen/EHScopeStack.h b/clang/lib/CodeGen/EHScopeStack.h index 3c8a51590d1b53..57b283a171ac94 100644 --- a/clang/lib/CodeGen/EHScopeStack.h +++ b/clang/lib/CodeGen/EHScopeStack.h @@ -87,6 +87,8 @@ enum CleanupKind : unsigned { LifetimeMarker = 0x8, NormalEHLifetimeMarker = LifetimeMarker | NormalAndEHCleanup, + + TaskExit = 0x10, }; /// A stack of scopes which respond to exceptions, including cleanups diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index ede9efb019ce80..1439bba8b10224 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3605,6 +3605,12 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { VTableName = "_ZTVN10__cxxabiv123__fundamental_type_infoE"; break; + case Type::Hyperobject: + // XXX needs work + BuildVTablePointer + (cast(Ty)->getElementType().getTypePtr()); + return; + case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: @@ -3862,6 +3868,9 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( case Type::DeducedTemplateSpecialization: llvm_unreachable("Undeduced type shouldn't get here"); + case Type::Hyperobject: + llvm_unreachable("Hyperobject shouldn't get here"); + case Type::Pipe: break; diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index a6bd2d41e79750..7139d30681a7e3 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -27,6 +27,7 @@ add_clang_library(clangDriver OptionUtils.cpp Phases.cpp SanitizerArgs.cpp + Tapir.cpp Tool.cpp ToolChain.cpp ToolChains/Arch/AArch64.cpp diff --git a/clang/lib/Driver/Tapir.cpp b/clang/lib/Driver/Tapir.cpp new file mode 100644 index 00000000000000..bea4998e137cd8 --- /dev/null +++ b/clang/lib/Driver/Tapir.cpp @@ -0,0 +1,84 @@ +//===--- Tapir.cpp - C Language Family Language Options ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the functions from Tapir.h +// +//===----------------------------------------------------------------------===// + +#include "clang/Driver/Tapir.h" +#include "clang/Driver/Options.h" +#include "clang/Driver/ToolChain.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Option/Arg.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" + +using namespace clang::driver; +using namespace clang; +using namespace llvm::opt; + +TapirTargetID clang::parseTapirTarget(const ArgList &Args) { + // Use Cilk if -ftapir is not specified but -fcilkplus is specified. + if (!Args.hasArg(options::OPT_ftapir_EQ)) { + if (Args.hasArg(options::OPT_fcilkplus)) + return TapirTargetID::Cilk; + if (Args.hasArg(options::OPT_fopencilk)) + return TapirTargetID::OpenCilk; + return TapirTargetID::None; + } + + // Otherwise use the runtime specified by -ftapir. + TapirTargetID TapirTarget = TapirTargetID::None; + if (const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ)) + TapirTarget = llvm::StringSwitch(A->getValue()) + .Case("none", TapirTargetID::None) + .Case("serial", TapirTargetID::Serial) + .Case("cheetah", TapirTargetID::Cheetah) + .Case("cilkplus", TapirTargetID::Cilk) + .Case("lambda", TapirTargetID::Lambda) + .Case("omptask", TapirTargetID::OMPTask) + .Case("opencilk", TapirTargetID::OpenCilk) + .Case("qthreads", TapirTargetID::Qthreads) + .Default(TapirTargetID::Last_TapirTargetID); + + return TapirTarget; +} + +std::optional +clang::serializeTapirTarget(TapirTargetID Target) { + std::optional TapirTargetStr; + switch (Target) { + case TapirTargetID::None: + TapirTargetStr = "none"; + break; + case TapirTargetID::Serial: + TapirTargetStr = "serial"; + break; + case TapirTargetID::Cheetah: + TapirTargetStr = "cheetah"; + break; + case TapirTargetID::Cilk: + TapirTargetStr = "cilkplus"; + break; + case TapirTargetID::Lambda: + TapirTargetStr = "lambda"; + break; + case TapirTargetID::OMPTask: + TapirTargetStr = "omptask"; + break; + case TapirTargetID::OpenCilk: + TapirTargetStr = "opencilk"; + break; + case TapirTargetID::Qthreads: + TapirTargetStr = "qthreads"; + break; + case TapirTargetID::Last_TapirTargetID: + break; + } + return TapirTargetStr; +} diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8dafc3d481c2e0..08b089755a81d9 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -23,6 +23,7 @@ #include "clang/Driver/Job.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Driver/Tapir.h" #include "clang/Driver/XRayArgs.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" @@ -1505,3 +1506,290 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( delete DAL; return nullptr; } + +void ToolChain::AddOpenCilkIncludeDir(const ArgList &Args, + ArgStringList &CmdArgs) const { + if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) + return; + + const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); + SmallString<128> P; + + // Check for an include directory. + P.assign(A->getValue()); + llvm::sys::path::append(P, "include"); + if (getVFS().exists(P)) { + addSystemInclude(Args, CmdArgs, P); + } else { + D.Diag(diag::err_drv_opencilk_resource_dir_missing_include) + << A->getAsString(Args); + } +} + +ToolChain::path_list +ToolChain::getOpenCilkRuntimePaths(const ArgList &Args) const { + path_list Paths; + + if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { + Paths = getRuntimePaths(); + Paths.push_back(getCompilerRTPath()); + return Paths; + } + + // If -opencilk-resource-dir= is specified, try to use that directory, and + // raise an error if that fails. + const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); + + // Try the triple passed to driver as --target=. + { + SmallString<128> P(A->getValue()); + llvm::sys::path::append(P, "lib", getTriple().str()); + Paths.push_back(std::string(P.str())); + } + // Try excluding the triple. + { + SmallString<128> P(A->getValue()); + if (Triple.isOSUnknown()) { + llvm::sys::path::append(P, "lib"); + } else { + llvm::sys::path::append(P, "lib", getOSLibName()); + } + Paths.push_back(std::string(P.str())); + } + + return Paths; +} + +static void addOpenCilkRuntimeRunPath(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs, + const llvm::Triple &Triple) { + // Allow the -fno-rtlib-add-rpath flag to prevent adding this default + // directory to the runpath. + if (!Args.hasFlag(options::OPT_frtlib_add_rpath, + options::OPT_fno_rtlib_add_rpath, true)) + return; + + bool FoundCandidate = false; + for (auto CandidateRPath : TC.getOpenCilkRuntimePaths(Args)) { + if (TC.getVFS().exists(CandidateRPath)) { + FoundCandidate = true; + CmdArgs.push_back("-L"); + CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str())); + CmdArgs.push_back("-rpath"); + CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str())); + } + } + if (FoundCandidate && Triple.isOSBinFormatELF()) + CmdArgs.push_back("--enable-new-dtags"); +} + +static StringRef getArchNameForOpenCilkRTLib(const ToolChain &TC, + const ArgList &Args) { + return getArchNameForCompilerRTLib(TC, Args); +} + +std::string ToolChain::getOpenCilkBCBasename(const ArgList &Args, + StringRef Component, + bool AddArch) const { + const llvm::Triple &TT = getTriple(); + const char *Prefix = "lib"; + const char *Suffix = ".bc"; + std::string ArchAndEnv; + if (AddArch) { + StringRef Arch = getArchNameForOpenCilkRTLib(*this, Args); + const char *Env = TT.isAndroid() ? "-android" : ""; + ArchAndEnv = ("-" + Arch + Env).str(); + } + return (Prefix + Component + ArchAndEnv + Suffix).str(); +} + +std::optional ToolChain::getOpenCilkBC(const ArgList &Args, + StringRef Component) const { + // Check for runtime files without the architecture first. + std::string BCBasename = + getOpenCilkBCBasename(Args, Component, /*AddArch=*/false); + for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { + SmallString<128> P(RuntimePath); + llvm::sys::path::append(P, BCBasename); + if (getVFS().exists(P)) + return std::optional(std::string(P.str())); + } + + // Fall back to the OpenCilk name with the arch if the no-arch version does + // not exist. + BCBasename = getOpenCilkBCBasename(Args, Component, /*AddArch=*/true); + for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { + SmallString<128> P(RuntimePath); + llvm::sys::path::append(P, BCBasename); + if (getVFS().exists(P)) + return std::optional(std::string(P.str())); + } + + return std::nullopt; +} + +void ToolChain::AddOpenCilkABIBitcode(const ArgList &Args, + ArgStringList &CmdArgs, + bool IsLTO) const { + // If --opencilk-abi-bitcode= is specified, use that specified path. + if (Args.hasArg(options::OPT_opencilk_abi_bitcode_EQ)) { + const Arg *A = Args.getLastArg(options::OPT_opencilk_abi_bitcode_EQ); + SmallString<128> P(A->getValue()); + if (!getVFS().exists(P)) { + getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) + << A->getAsString(Args); + } + if (IsLTO) + CmdArgs.push_back( + Args.MakeArgString("--plugin-opt=opencilk-abi-bitcode=" + P)); + } + + bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); + StringRef OpenCilkBCName = UseAsan ? "opencilk-asan-abi" : "opencilk-abi"; + if (auto OpenCilkABIBCFilename = getOpenCilkBC(Args, OpenCilkBCName)) { + if (IsLTO) + CmdArgs.push_back(Args.MakeArgString("--plugin-opt=opencilk-abi-bitcode=" + + *OpenCilkABIBCFilename)); + else + CmdArgs.push_back(Args.MakeArgString("--opencilk-abi-bitcode=" + + *OpenCilkABIBCFilename)); + return; + } + + // Error if we could not find a bitcode file. + getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) + << getOpenCilkBCBasename(Args, OpenCilkBCName, /*AddArch=*/false); +} + +std::string ToolChain::getOpenCilkRTBasename(const ArgList &Args, + StringRef Component, + FileType Type, + bool AddArch) const { + const llvm::Triple &TT = getTriple(); + const char *Prefix = "lib"; + const char *Suffix; + switch (Type) { + case ToolChain::FT_Object: + Suffix = ".o"; + break; + case ToolChain::FT_Static: + Suffix = ".a"; + break; + case ToolChain::FT_Shared: + Suffix = ".so"; + break; + } + std::string ArchAndEnv; + if (AddArch) { + StringRef Arch = getArchNameForOpenCilkRTLib(*this, Args); + const char *Env = TT.isAndroid() ? "-android" : ""; + ArchAndEnv = ("-" + Arch + Env).str(); + } + return (Prefix + Component + ArchAndEnv + Suffix).str(); +} + +std::string ToolChain::getOpenCilkRT(const ArgList &Args, StringRef Component, + FileType Type) const { + // Check for runtime files without the architecture first. + std::string RTBasename = + getOpenCilkRTBasename(Args, Component, Type, /*AddArch=*/false); + if (Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { + // If opencilk-resource-dir is specified, look for the library in that + // directory. + for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { + SmallString<128> P(RuntimePath); + llvm::sys::path::append(P, RTBasename); + if (getVFS().exists(P)) + return std::string(P.str()); + } + } else { + for (const auto &LibPath : getLibraryPaths()) { + SmallString<128> P(LibPath); + llvm::sys::path::append(P, RTBasename); + if (getVFS().exists(P)) + // If we found the library in LibraryPaths, let the linker resolve it. + return std::string(("-l" + Component).str()); + } + } + + // Fall back to the OpenCilk name with the arch if the no-arch version does + // not exist. + RTBasename = getOpenCilkRTBasename(Args, Component, Type, /*AddArch=*/true); + for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { + SmallString<128> P(RuntimePath); + llvm::sys::path::append(P, RTBasename); + if (getVFS().exists(P)) + return std::string(P.str()); + } + + // Otherwise, trust the linker to find the library on the system. + return std::string(("-l" + Component).str()); +} + +void ToolChain::AddTapirRuntimeLibArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + TapirTargetID TapirTarget = parseTapirTarget(Args); + if (TapirTarget == TapirTargetID::Last_TapirTargetID) + if (const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ)) + getDriver().Diag(diag::err_drv_invalid_value) << A->getAsString(Args) + << A->getValue(); + + switch (TapirTarget) { + case TapirTargetID::Cheetah: + CmdArgs.push_back("-lcheetah"); + CmdArgs.push_back("-lpthread"); + break; + case TapirTargetID::OpenCilk: { + bool StaticOpenCilk = Args.hasArg(options::OPT_static_libopencilk) || + Args.hasArg(options::OPT_static); + bool OnlyStaticOpenCilk = Args.hasArg(options::OPT_static_libopencilk) && + !Args.hasArg(options::OPT_static); + bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); + if (OnlyStaticOpenCilk) + CmdArgs.push_back("-Bstatic"); + + // If pedigrees are enabled, link the OpenCilk pedigree library. + if (Args.hasArg(options::OPT_fopencilk_enable_pedigrees)) + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, UseAsan ? "opencilk-pedigrees-asan" : "opencilk-pedigrees", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + + // Link the correct Cilk personality fn + if (getDriver().CCCIsCXX()) + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, + UseAsan ? "opencilk-asan-personality-cpp" + : "opencilk-personality-cpp", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + else + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, + UseAsan ? "opencilk-asan-personality-c" : "opencilk-personality-c", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + + // Link the opencilk runtime. We do this after linking the personality + // function, to ensure that symbols are resolved correctly when using static + // linking. + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, UseAsan ? "opencilk-asan" : "opencilk", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + + // Add to the executable's runpath the default directory containing OpenCilk + // runtime. + addOpenCilkRuntimeRunPath(*this, Args, CmdArgs, Triple); + if (OnlyStaticOpenCilk) { + CmdArgs.push_back("-Bdynamic"); + CmdArgs.push_back("-lpthread"); + } + break; + } + case TapirTargetID::Cilk: + CmdArgs.push_back("-lcilkrts"); + break; + case TapirTargetID::Qthreads: + CmdArgs.push_back("-lqthread"); + break; + default: + break; + } +} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 37a07b8f224d9d..b77de97a6e86b3 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1362,6 +1362,9 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, // OBJCPLUS_INCLUDE_PATH - system includes enabled when compiling ObjC++. addDirectoryList(Args, CmdArgs, "-objcxx-isystem", "OBJCPLUS_INCLUDE_PATH"); + // If a custom OpenCilk resource directory is specified, add its include path. + getToolChain().AddOpenCilkIncludeDir(Args, CmdArgs); + // While adding the include arguments, we also attempt to retrieve the // arguments of related offloading toolchains or arguments that are specific // of an offloading programming model. @@ -6257,6 +6260,74 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fdiagnostics_show_template_tree); Args.AddLastArg(CmdArgs, options::OPT_fno_elide_type); + // Forward flags for Cilk. + Args.AddLastArg(CmdArgs, options::OPT_fcilkplus); + Args.AddLastArg(CmdArgs, options::OPT_fopencilk); + Args.AddLastArg(CmdArgs, options::OPT_ftapir_EQ); + if (Args.hasArg(options::OPT_fcilkplus) || + Args.hasArg(options::OPT_fopencilk) || + Args.hasArg(options::OPT_ftapir_EQ)) { + auto const &Triple = getToolChain().getTriple(); + + // At least one runtime has been implemented for these operating systems. + if (!Triple.isOSLinux() && !Triple.isOSFreeBSD() && !Triple.isMacOSX()) + D.Diag(diag::err_drv_cilk_unsupported); + + /* JFC: Is it possible to confuse with with -fno-opencilk? */ + bool OpenCilk = Args.hasArgNoClaim(options::OPT_fopencilk); + bool Cheetah = false; + bool CustomTarget = false; + + if (Arg *TapirRuntime = Args.getLastArgNoClaim(options::OPT_ftapir_EQ)) { + Cheetah = TapirRuntime->getValue() == StringRef("cheetah"); + if (TapirRuntime->getValue() == StringRef("opencilk")) { + OpenCilk = true; + } else { + CustomTarget = true; + } + } + + if (Cheetah && Triple.getArch() != llvm::Triple::x86_64) { + D.Diag(diag::err_drv_cilk_unsupported); + } + if (OpenCilk) { + switch (Triple.getArch()) { + case llvm::Triple::x86: + case llvm::Triple::x86_64: + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + break; + default: + D.Diag(diag::err_drv_cilk_unsupported); + break; + } + + // If an OpenCilk resource directory is specified, check that it is valid. + if (Args.hasArgNoClaim(options::OPT_opencilk_resource_dir_EQ)) { + bool ValidPathFound = false; + for (auto Path : getToolChain().getOpenCilkRuntimePaths(Args)) { + if (D.getVFS().exists(Path)) { + ValidPathFound = true; + break; + } + } + if (!ValidPathFound) + D.Diag(diag::err_drv_opencilk_resource_dir_missing_lib) + << Args.getLastArgNoClaim(options::OPT_opencilk_resource_dir_EQ) + ->getAsString(Args); + } + + // Forward flags for enabling pedigrees. + Args.AddLastArg(CmdArgs, options::OPT_fopencilk_enable_pedigrees); + + if (!CustomTarget) + // Add the OpenCilk ABI bitcode file. + getToolChain().AddOpenCilkABIBitcode(Args, CmdArgs); + } + } + // Forward flags for OpenMP. We don't do this if the current action is an // device offloading action other than OpenMP. if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, @@ -6344,6 +6415,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType); + if (Args.hasArg(options::OPT_fcsi_EQ)) + Args.AddLastArg(CmdArgs, options::OPT_fcsi_EQ); + else if (Args.hasArg(options::OPT_fcsi)) + Args.AddLastArg(CmdArgs, options::OPT_fcsi); + + if (Args.hasArg(options::OPT_fcilktool_EQ)) + Args.AddLastArg(CmdArgs, options::OPT_fcilktool_EQ); + const XRayArgs &XRay = TC.getXRayArgs(); XRay.addArgs(TC, Args, CmdArgs, InputType); @@ -7011,6 +7090,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_slp_vectorize, EnableSLPVec)) CmdArgs.push_back("-vectorize-slp"); + // -fstripmine is enabled based on the optimization level selected. For now, + // we enable stripmining when the optimization level enables vectorization. + bool EnableStripmine = EnableVec; + OptSpecifier StripmineAliasOption = + EnableStripmine ? options::OPT_O_Group : options::OPT_fstripmine; + if (Args.hasFlag(options::OPT_fstripmine, StripmineAliasOption, + options::OPT_fno_stripmine, EnableStripmine)) + CmdArgs.push_back("-stripmine-loops"); + ParseMPreferVectorWidth(D, Args, CmdArgs); Args.AddLastArg(CmdArgs, options::OPT_fshow_overloads_EQ); diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp index ee2cb10fde7d20..f5b8480fb0ec9c 100644 --- a/clang/lib/Driver/ToolChains/CloudABI.cpp +++ b/clang/lib/Driver/ToolChains/CloudABI.cpp @@ -83,6 +83,9 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); + + ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { CmdArgs.push_back("-lc"); CmdArgs.push_back("-lcompiler_rt"); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 0d6907b8e5c7ae..7dcc00f1dd30d7 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -146,6 +146,23 @@ static bool shouldIgnoreUnsupportedTargetFeature(const Arg &TargetFeatureArg, return TargetFeatureArg.getOption().matches(options::OPT_mno_cumode); } +static void renderTapirLoweringOptions(const ArgList &Args, + ArgStringList &CmdArgs, + const ToolChain &TC) { + if (Args.hasArg(options::OPT_fcilkplus) || + Args.hasArg(options::OPT_fopencilk) || + Args.hasArg(options::OPT_ftapir_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ)) + CmdArgs.push_back(Args.MakeArgString( + Twine("--plugin-opt=tapir-target=") + A->getValue())); + else if (Args.hasArg(options::OPT_fopencilk)) { + CmdArgs.push_back("--plugin-opt=tapir-target=opencilk"); + TC.AddOpenCilkABIBitcode(Args, CmdArgs, /*IsLTO=*/true); + } else if (Args.hasArg(options::OPT_fcilkplus)) + CmdArgs.push_back("--plugin-opt=tapir-target=cilkplus"); + } +} + void tools::addPathIfExists(const Driver &D, const Twine &Path, ToolChain::path_list &Paths) { if (D.getVFS().exists(Path)) @@ -853,6 +870,8 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, // Handle remarks hotness/threshold related options. renderRemarksHotnessOptions(Args, CmdArgs, PluginOptPrefix); + renderTapirLoweringOptions(Args, CmdArgs, ToolChain); + addMachineOutlinerArgs(D, Args, CmdArgs, ToolChain.getEffectiveTriple(), /*IsLTO=*/true, PluginOptPrefix); } @@ -1038,6 +1057,29 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC, CmdArgs.push_back("-lresolv"); } +// CilkSanitizer has different runtime requirements than typical sanitizers. +bool tools::needsCilkSanitizerDeps(const ToolChain &TC, const ArgList &Args) { + const SanitizerArgs &SanArgs = TC.getSanitizerArgs(Args); + if (Args.hasArg(options::OPT_shared) || SanArgs.needsSharedRt()) { + // Don't link static runtimes into DSOs or if -shared-libasan. + return false; + } + if (Args.hasArg(options::OPT_nostdlibxx)) { + return false; + } + return SanArgs.needsCilksanRt(); +} + +void tools::linkCilkSanitizerRuntimeDeps(const ArgList &Args, + const ToolChain &TC, + ArgStringList &CmdArgs) { + // Force linking against the system libraries sanitizers depends on + // (see PR15823 why this is necessary). + CmdArgs.push_back("--no-as-needed"); + // Link in the C++ standard library + TC.AddCXXStdlibLibArgs(Args, CmdArgs); +} + static void collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, SmallVectorImpl &SharedRuntimes, @@ -1077,6 +1119,8 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (!Args.hasArg(options::OPT_shared)) HelperStaticRuntimes.push_back("hwasan-preinit"); } + if (SanArgs.needsCilksanRt()) + SharedRuntimes.push_back("cilksan"); } // The stats_client library is also statically linked into DSOs. @@ -1101,6 +1145,8 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, if (SanArgs.linkCXXRuntimes()) StaticRuntimes.push_back("asan_cxx"); } + if (!SanArgs.needsSharedRt() && SanArgs.needsCilksanRt()) + StaticRuntimes.push_back("cilksan"); if (!SanArgs.needsSharedRt() && SanArgs.needsMemProfRt() && SanArgs.linkRuntimes()) { @@ -1236,6 +1282,15 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args, CmdArgs.push_back("--android-memtag-stack"); } + if (SanArgs.needsCilksanRt()) { + // Interpose the __cilkrts_internal_merge_two_rmaps, __cilkrts_hyper_alloc, + // and __cilkrts_hyper_dealloc functions in the OpenCilk runtime, to properly + // suppress races involving reducer hyperobjects. + CmdArgs.push_back("--wrap=__cilkrts_internal_merge_two_rmaps"); + CmdArgs.push_back("--wrap=__cilkrts_hyper_alloc"); + CmdArgs.push_back("--wrap=__cilkrts_hyper_dealloc"); + } + return !StaticRuntimes.empty() || !NonWholeStaticRuntimes.empty(); } @@ -1268,6 +1323,32 @@ void tools::linkXRayRuntimeDeps(const ToolChain &TC, ArgStringList &CmdArgs) { CmdArgs.push_back("-ldl"); } +bool tools::addCSIRuntime(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { + // Only add the CSI runtime library if -fcsi is specified. + if (!Args.hasArg(options::OPT_fcsi_EQ) && !Args.hasArg(options::OPT_fcsi)) + return false; + + CmdArgs.push_back(TC.getCompilerRTArgString(Args, "csi")); + return true; +} + +bool tools::addCilktoolRuntime(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs) { + if (Arg *A = Args.getLastArg(options::OPT_fcilktool_EQ)) { + StringRef Val = A->getValue(); + bool Shared = Args.hasArg(options::OPT_shared) || + Args.hasFlag(options::OPT_shared_libcilktool, + options::OPT_static_libcilktool, false); + CmdArgs.push_back(TC.getCompilerRTArgString( + Args, Val, Shared ? ToolChain::FT_Shared : ToolChain::FT_Static)); + // Link in the C++ standard library + TC.AddCXXStdlibLibArgs(Args, CmdArgs); + return true; + } + return false; +} + bool tools::areOptimizationsEnabled(const ArgList &Args) { // Find the last -O arg and see if it is non-zero. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index 6a8de0f1c36d1f..f1ac6fd712c97a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -42,12 +42,25 @@ bool addSanitizerRuntimes(const ToolChain &TC, const llvm::opt::ArgList &Args, void linkSanitizerRuntimeDeps(const ToolChain &TC, llvm::opt::ArgStringList &CmdArgs); +bool needsCilkSanitizerDeps(const ToolChain &TC, + const llvm::opt::ArgList &Args); + +void linkCilkSanitizerRuntimeDeps(const llvm::opt::ArgList &Args, + const ToolChain &TC, + llvm::opt::ArgStringList &CmdArgs); + bool addXRayRuntime(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); void linkXRayRuntimeDeps(const ToolChain &TC, llvm::opt::ArgStringList &CmdArgs); +bool addCSIRuntime(const ToolChain &TC, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); + +bool addCilktoolRuntime(const ToolChain &TC, const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs); + void AddRunTimeLibs(const ToolChain &TC, const Driver &D, llvm::opt::ArgStringList &CmdArgs, const llvm::opt::ArgList &Args); diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 3c5dfba329cf8e..eba34c6890ba51 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -178,6 +178,8 @@ void tools::CrossWindows::Linker::ConstructJob( CmdArgs.push_back("-Bdynamic"); } + TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { // TODO handle /MT[d] /MD[d] diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 65bd6c6a7eb35a..c86832af9e5edf 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -18,6 +18,7 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Driver/Tapir.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/ProfileData/InstrProf.h" @@ -559,6 +560,26 @@ static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs, } } +static void renderTapirLoweringOptions(const ArgList &Args, + ArgStringList &CmdArgs, + const ToolChain &TC, bool LinkerIsLLD) { + if (!(TC.getDriver().isUsingLTO() && LinkerIsLLD)) + return; + + if (Args.hasArg(options::OPT_fcilkplus) || + Args.hasArg(options::OPT_fopencilk) || + Args.hasArg(options::OPT_ftapir_EQ)) { + if (const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ)) + CmdArgs.push_back( + Args.MakeArgString(Twine("--tapir-target=") + A->getValue())); + else if (Args.hasArg(options::OPT_fopencilk)) { + CmdArgs.push_back("--tapir-target=opencilk"); + TC.AddOpenCilkABIBitcode(Args, CmdArgs, /*IsLTO=*/true); + } else if (Args.hasArg(options::OPT_fcilkplus)) + CmdArgs.push_back("--tapir-target=cilkplus"); + } +} + static void AppendPlatformPrefix(SmallString<128> &Path, const llvm::Triple &T); void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, @@ -608,6 +629,8 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().getTriple())) renderRemarksOptions(Args, CmdArgs, getToolChain().getTriple(), Output, JA); + renderTapirLoweringOptions(Args, CmdArgs, getToolChain(), LinkerIsLLD); + // Propagate the -moutline flag to the linker in LTO. if (Arg *A = Args.getLastArg(options::OPT_moutline, options::OPT_mno_outline)) { @@ -739,6 +762,8 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, } } + getMachOToolChain().AddLinkTapirRuntime(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) { // endfile_spec is empty. } @@ -1428,6 +1453,21 @@ void DarwinClang::AddLinkSanitizerLibArgs(const ArgList &Args, AddLinkRuntimeLib(Args, CmdArgs, Sanitizer, RLO, Shared); } +void DarwinClang::AddCilktoolRTLibs(const ArgList &Args, + ArgStringList &CmdArgs) const { + if (Arg *A = Args.getLastArg(options::OPT_fcilktool_EQ)) { + StringRef Val = A->getValue(); + bool Shared = Args.hasArg(options::OPT_shared) || + Args.hasFlag(options::OPT_shared_libcilktool, + options::OPT_static_libcilktool, false); + auto RLO = + RuntimeLinkOptions(RLO_AlwaysLink | (Shared ? RLO_AddRPath : 0U)); + AddLinkRuntimeLib(Args, CmdArgs, Val, RLO, Shared); + // Link in the C++ standard library + AddCXXStdlibLibArgs(Args, CmdArgs); + } +} + ToolChain::RuntimeLibType DarwinClang::GetRuntimeLibType( const ArgList &Args) const { if (Arg* A = Args.getLastArg(options::OPT_rtlib_EQ)) { @@ -1502,6 +1542,14 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, "Static sanitizer runtimes not supported"); AddLinkSanitizerLibArgs(Args, CmdArgs, "tsan"); } + if (Sanitize.needsCilksanRt()) { + // Cilksan's instrumentation for standard-library routines and LLVM + // intrinsics currently requires Cilksan to be statically linked. + AddLinkSanitizerLibArgs(Args, CmdArgs, "cilksan"); + + // Cilksan is written in C++ and requires libcxx. + AddCXXStdlibLibArgs(Args, CmdArgs); + } if (Sanitize.needsFuzzer() && !Args.hasArg(options::OPT_dynamiclib)) { AddLinkSanitizerLibArgs(Args, CmdArgs, "fuzzer", /*shared=*/false); @@ -1514,6 +1562,8 @@ void DarwinClang::AddLinkRuntimeLibArgs(const ArgList &Args, } } + AddCilktoolRTLibs(Args, CmdArgs); + const XRayArgs &XRay = getXRayArgs(); if (XRay.needsXRayRt()) { AddLinkRuntimeLib(Args, CmdArgs, "xray"); @@ -3317,6 +3367,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const { (isTargetMacOSBased() || isTargetIOSSimulator() || isTargetTvOSSimulator() || isTargetWatchOSSimulator())) { Res |= SanitizerKind::Thread; + Res |= SanitizerKind::Cilk; } return Res; } @@ -3325,3 +3376,178 @@ void Darwin::printVerboseInfo(raw_ostream &OS) const { CudaInstallation.print(OS); RocmInstallation.print(OS); } + +ToolChain::path_list +DarwinClang::getOpenCilkRuntimePaths(const ArgList &Args) const { + path_list Paths; + if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { + SmallString<128> P(getDriver().ResourceDir); + llvm::sys::path::append(P, "lib", "darwin"); + Paths.push_back(std::string(P.str())); + return Paths; + } + + // If -opencilk-resource-dir= is specified, try to use that directory, and + // raise an error if that fails. + const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); + + // Try the lib/darwin subdirectory + { + SmallString<128> P(A->getValue()); + llvm::sys::path::append(P, "lib", "darwin"); + Paths.push_back(std::string(P.str())); + } + // Try the lib subdirectory + { + SmallString<128> P(A->getValue()); + llvm::sys::path::append(P, "lib"); + Paths.push_back(std::string(P.str())); + } + return Paths; +} + +void DarwinClang::AddOpenCilkABIBitcode(const ArgList &Args, + ArgStringList &CmdArgs, + bool IsLTO) const { + // If --opencilk-abi-bitcode= is specified, use that specified path. + if (Args.hasArg(options::OPT_opencilk_abi_bitcode_EQ)) { + const Arg *A = Args.getLastArg(options::OPT_opencilk_abi_bitcode_EQ); + SmallString<128> P(A->getValue()); + if (!getVFS().exists(P)) + getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) + << A->getAsString(Args); + if (IsLTO) + CmdArgs.push_back( + Args.MakeArgString("--opencilk-abi-bitcode=" + P)); + } + + bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); + SmallString<128> BitcodeFilename(UseAsan ? "libopencilk-asan-abi" + : "libopencilk-abi"); + BitcodeFilename += "_"; + BitcodeFilename += getOSLibraryNameSuffix(); + BitcodeFilename += ".bc"; + + for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { + SmallString<128> P(RuntimePath); + llvm::sys::path::append(P, BitcodeFilename); + if (getVFS().exists(P)) { + // The same argument works regardless of IsLTO. + CmdArgs.push_back(Args.MakeArgString("--opencilk-abi-bitcode=" + P)); + return; + } + } + getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) + << BitcodeFilename; +} + +void DarwinClang::AddLinkTapirRuntimeLib(const ArgList &Args, + ArgStringList &CmdArgs, + StringRef LibName, + RuntimeLinkOptions Opts, + bool IsShared) const { + SmallString<64> DarwinLibName = StringRef("lib"); + DarwinLibName += LibName; + DarwinLibName += "_"; + DarwinLibName += getOSLibraryNameSuffix(); + DarwinLibName += IsShared ? "_dynamic.dylib" : ".a"; + SmallString<128> Dir(getDriver().ResourceDir); + if (Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { + for (auto OpenCilkRuntimeDir : getOpenCilkRuntimePaths(Args)) { + if (getVFS().exists(OpenCilkRuntimeDir)) { + Dir.assign(OpenCilkRuntimeDir); + break; + } + } + } else { + llvm::sys::path::append( + Dir, "lib", (Opts & RLO_IsEmbedded) ? "macho_embedded" : "darwin"); + } + + SmallString<128> P(Dir); + llvm::sys::path::append(P, DarwinLibName); + + // For now, allow missing resource libraries to support developers who may + // not have compiler-rt checked out or integrated into their build (unless + // we explicitly force linking with this library). + if ((Opts & RLO_AlwaysLink) || getVFS().exists(P)) { + const char *LibArg = Args.MakeArgString(P); + CmdArgs.push_back(LibArg); + } + + // Adding the rpaths might negatively interact when other rpaths are involved, + // so we should make sure we add the rpaths last, after all user-specified + // rpaths. This is currently true from this place, but we need to be + // careful if this function is ever called before user's rpaths are emitted. + if (Opts & RLO_AddRPath) { + assert(DarwinLibName.endswith(".dylib") && "must be a dynamic library"); + + // Add @executable_path to rpath to support having the dylib copied with + // the executable. + CmdArgs.push_back("-rpath"); + CmdArgs.push_back("@executable_path"); + + // Add the path to the resource dir to rpath to support using the dylib + // from the default location without copying. + CmdArgs.push_back("-rpath"); + CmdArgs.push_back(Args.MakeArgString(Dir)); + } +} + +void DarwinClang::AddLinkTapirRuntime(const ArgList &Args, + ArgStringList &CmdArgs) const { + TapirTargetID TapirTarget = parseTapirTarget(Args); + if (TapirTarget == TapirTargetID::Last_TapirTargetID) + if (const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ)) + getDriver().Diag(diag::err_drv_invalid_value) << A->getAsString(Args) + << A->getValue(); + + switch (TapirTarget) { + case TapirTargetID::Cheetah: + CmdArgs.push_back("-lcheetah"); + break; + case TapirTargetID::OpenCilk: { + bool StaticOpenCilk = Args.hasArg(options::OPT_static_libopencilk); + bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); + + auto RLO = RLO_AlwaysLink; + if (!StaticOpenCilk) + RLO = RuntimeLinkOptions(RLO | RLO_AddRPath); + + // If pedigrees are enabled, link the OpenCilk pedigree library. + if (Args.hasArg(options::OPT_fopencilk_enable_pedigrees)) + AddLinkTapirRuntimeLib(Args, CmdArgs, + UseAsan ? "opencilk-pedigrees-asan" + : "opencilk-pedigrees", + RLO, !StaticOpenCilk); + + // Link the correct Cilk personality fn + if (getDriver().CCCIsCXX()) + AddLinkTapirRuntimeLib(Args, CmdArgs, + UseAsan ? "opencilk-asan-personality-cpp" + : "opencilk-personality-cpp", + RLO, !StaticOpenCilk); + else + AddLinkTapirRuntimeLib(Args, CmdArgs, + UseAsan ? "opencilk-asan-personality-c" + : "opencilk-personality-c", + RLO, !StaticOpenCilk); + + // Link the opencilk runtime. We do this after linking the personality + // function, to ensure that symbols are resolved correctly when using static + // linking. + AddLinkTapirRuntimeLib(Args, CmdArgs, + UseAsan ? "opencilk-asan" : "opencilk", RLO, + !StaticOpenCilk); + break; + } + case TapirTargetID::Cilk: + CmdArgs.push_back("-lcilkrts"); + break; + case TapirTargetID::Qthreads: + CmdArgs.push_back("-lqthread"); + break; + default: + break; + } +} diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h index 815449ae8f3792..ab56ceced5dd27 100644 --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -171,6 +171,10 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain { virtual void AddLinkARCArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const {} + /// Add the linker arguments to link a Tapir runtime library. + virtual void AddLinkTapirRuntime(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const {} + /// Add the linker arguments to link the compiler runtime library. /// /// FIXME: This API is intended for use with embedded libraries only, and is @@ -601,6 +605,16 @@ class LLVM_LIBRARY_VISIBILITY DarwinClang : public Darwin { void AddLinkARCArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; + path_list + getOpenCilkRuntimePaths(const llvm::opt::ArgList &Args) const override; + + void AddOpenCilkABIBitcode(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + bool IsLTO = false) const override; + + void AddLinkTapirRuntime(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + unsigned GetDefaultDwarfVersion() const override; // Until dtrace (via CTF) and LLDB can deal with distributed debug info, // Darwin defaults to standalone/full debug info. @@ -617,6 +631,14 @@ class LLVM_LIBRARY_VISIBILITY DarwinClang : public Darwin { StringRef Sanitizer, bool shared = true) const; + void AddCilktoolRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const; + + void AddLinkTapirRuntimeLib(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, + StringRef LibName, RuntimeLinkOptions Opts, + bool IsShared) const; + bool AddGnuCPlusPlusIncludePaths(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, llvm::SmallString<128> Base, diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index ba901407715f32..420521335b804f 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -120,6 +120,8 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { CmdArgs.push_back("-L/usr/lib/gcc80"); diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 84e257741702ce..a223024c015e59 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -282,11 +282,17 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); + bool NeedsCilkSanitizerDeps = needsCilkSanitizerDeps(ToolChain, Args); addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); unsigned Major = ToolChain.getTriple().getOSMajorVersion(); bool Profiling = Args.hasArg(options::OPT_pg) && Major != 0 && Major < 14; + addCSIRuntime(ToolChain, Args, CmdArgs); + addCilktoolRuntime(ToolChain, Args, CmdArgs); + + ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { // Use the static OpenMP runtime with -static-openmp @@ -306,6 +312,10 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, linkSanitizerRuntimeDeps(ToolChain, CmdArgs); if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, CmdArgs); + if (NeedsCilkSanitizerDeps) { + linkCilkSanitizerRuntimeDeps(Args, ToolChain, CmdArgs); + CmdArgs.push_back("-lstdthreads"); + } // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. if (Profiling) @@ -477,6 +487,7 @@ SanitizerMask FreeBSD::getSupportedSanitizers() const { Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; Res |= SanitizerKind::Vptr; + Res |= SanitizerKind::Cilk; if (IsAArch64 || IsX86_64 || IsMIPS64) { Res |= SanitizerKind::Leak; Res |= SanitizerKind::Thread; diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 65692cc7f954cb..9d79ea5bcbd666 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -144,6 +144,9 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); + addCSIRuntime(ToolChain, Args, CmdArgs); + addCilktoolRuntime(ToolChain, Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { if (Args.hasArg(options::OPT_static)) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 40038dce47d844..3d772223fc78e1 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -545,6 +545,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--no-demangle"); bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); + bool NeedsCilkSanitizerDeps = needsCilkSanitizerDeps(ToolChain, Args); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); @@ -554,6 +555,11 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); + addCSIRuntime(ToolChain, Args, CmdArgs); + addCilktoolRuntime(ToolChain, Args, CmdArgs); + + ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { @@ -590,6 +596,9 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(ToolChain, CmdArgs); + if (NeedsCilkSanitizerDeps) + linkCilkSanitizerRuntimeDeps(Args, ToolChain, CmdArgs); + if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 1ba222bf83b103..1247a32d4cbae8 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -812,6 +812,7 @@ SanitizerMask Linux::getSupportedSanitizers() const { Res |= SanitizerKind::HWAddress; } if (IsX86_64 || IsAArch64) { + Res |= SanitizerKind::Cilk; Res |= SanitizerKind::KernelHWAddress; } // Work around "Cannot represent a difference across sections". diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 503dbf3f0dea3c..b290028c6cc82f 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -243,6 +243,8 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, addFortranRuntimeLibs(TC, CmdArgs); } + TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + // TODO: Add profile stuff here if (TC.ShouldLinkCXXStdlib(Args)) { diff --git a/clang/lib/Driver/ToolChains/Minix.cpp b/clang/lib/Driver/ToolChains/Minix.cpp index 4b8670a79012ee..e9665b712e59a1 100644 --- a/clang/lib/Driver/ToolChains/Minix.cpp +++ b/clang/lib/Driver/ToolChains/Minix.cpp @@ -72,6 +72,8 @@ void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().addProfileRTLibs(Args, CmdArgs); + getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { if (D.CCCIsCXX()) { diff --git a/clang/lib/Driver/ToolChains/Myriad.cpp b/clang/lib/Driver/ToolChains/Myriad.cpp index 30424ff49e6477..df45b3139d8709 100644 --- a/clang/lib/Driver/ToolChains/Myriad.cpp +++ b/clang/lib/Driver/ToolChains/Myriad.cpp @@ -168,11 +168,19 @@ void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, TC.AddFilePathLibArgs(Args, CmdArgs); bool NeedsSanitizerDeps = addSanitizerRuntimes(TC, Args, CmdArgs); + bool NeedsCilkSanitizerDeps = needsCilkSanitizerDeps(TC, Args); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + addCSIRuntime(TC, Args, CmdArgs); + addCilktoolRuntime(TC, Args, CmdArgs); + + TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (UseDefaultLibs) { if (NeedsSanitizerDeps) linkSanitizerRuntimeDeps(TC, CmdArgs); + if (NeedsCilkSanitizerDeps) + linkCilkSanitizerRuntimeDeps(Args, TC, CmdArgs); if (C.getDriver().CCCIsCXX()) { if (TC.GetCXXStdlibType(Args) == ToolChain::CST_Libcxx) { CmdArgs.push_back("-lc++"); diff --git a/clang/lib/Driver/ToolChains/NaCl.cpp b/clang/lib/Driver/ToolChains/NaCl.cpp index 38151735ee51fa..2a3c9194b728c6 100644 --- a/clang/lib/Driver/ToolChains/NaCl.cpp +++ b/clang/lib/Driver/ToolChains/NaCl.cpp @@ -130,6 +130,8 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); + ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { if (ToolChain.ShouldLinkCXXStdlib(Args)) { diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index ab028f59deaa47..1e2530743a8009 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -263,6 +263,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); bool NeedsXRayDeps = addXRayRuntime(ToolChain, Args, CmdArgs); + bool NeedsCilkSanitizerDeps = needsCilkSanitizerDeps(ToolChain, Args); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); const SanitizerArgs &SanArgs = ToolChain.getSanitizerArgs(Args); @@ -271,6 +272,9 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Args.MakeArgString(ToolChain.getCompilerRTPath())); } + addCSIRuntime(getToolChain(), Args, CmdArgs); + addCilktoolRuntime(getToolChain(), Args, CmdArgs); + bool useLibgcc = true; switch (ToolChain.getArch()) { case llvm::Triple::aarch64: @@ -308,6 +312,8 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, linkSanitizerRuntimeDeps(ToolChain, CmdArgs); if (NeedsXRayDeps) linkXRayRuntimeDeps(ToolChain, CmdArgs); + if (NeedsCilkSanitizerDeps) + linkCilkSanitizerRuntimeDeps(Args, getToolChain(), CmdArgs); if (Args.hasArg(options::OPT_pthread)) CmdArgs.push_back("-lpthread"); CmdArgs.push_back("-lc"); diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 2f43d33bf0f1c8..8b330b9e5ab2dd 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -222,6 +222,8 @@ void tools::PScpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); + TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + if (Args.hasArg(options::OPT_pthread)) { CmdArgs.push_back("-lpthread"); } diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 85619a91554e1a..01eef30a0de5d1 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -151,6 +151,8 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(getToolChain(), Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); + getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { if (D.CCCIsCXX()) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 1fba91bed04141..0283a3cea9ccb9 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -30,6 +30,7 @@ #include "clang/Driver/Driver.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" +#include "clang/Driver/Tapir.h" #include "clang/Frontend/CommandLineSourceLoc.h" #include "clang/Frontend/DependencyOutputOptions.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -87,6 +88,8 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Host.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" +#include "llvm/Support/ScopedPrinter.h" #include #include #include @@ -1300,6 +1303,94 @@ static SmallVector serializeSanitizerKinds(SanitizerSet S) { return Values; } +static LangOptions::CilktoolKind +parseCilktoolKind(StringRef FlagName, ArgList &Args, DiagnosticsEngine &Diags) { + if (Arg *A = Args.getLastArg(OPT_fcilktool_EQ)) { + StringRef Val = A->getValue(); + LangOptions::CilktoolKind ParsedCilktool = + llvm::StringSwitch(Val) + .Case("cilkscale", LangOptions::Cilktool_Cilkscale) + .Case("cilkscale-instructions", + LangOptions::Cilktool_Cilkscale_InstructionCount) + .Case("cilkscale-benchmark", + LangOptions::Cilktool_Cilkscale_Benchmark) + .Default(LangOptions::Cilktool_None); + if (ParsedCilktool == LangOptions::Cilktool_None) + Diags.Report(diag::err_drv_invalid_value) << FlagName << Val; + else + return ParsedCilktool; + } + return LangOptions::Cilktool_None; +} + +static std::optional +serializeCilktoolKind(LangOptions::CilktoolKind K) { + std::optional CilktoolStr; + switch (K) { + case LangOptions::Cilktool_Cilkscale: + CilktoolStr = "cilkscale"; + break; + case LangOptions::Cilktool_Cilkscale_InstructionCount: + CilktoolStr = "cilkscale-instructions"; + break; + case LangOptions::Cilktool_Cilkscale_Benchmark: + CilktoolStr = "cilkscale-benchmark"; + break; + case LangOptions::Cilktool_None: + break; + } + return CilktoolStr; +} + +static LangOptions::CSIExtensionPoint +parseCSIExtensionPoint(StringRef FlagName, ArgList &Args, + DiagnosticsEngine &Diags) { + if (Arg *A = Args.getLastArg(OPT_fcsi_EQ)) { + StringRef Val = A->getValue(); + LangOptions::CSIExtensionPoint ParsedExt = + llvm::StringSwitch(Val) + .Case("first", LangOptions::CSI_EarlyAsPossible) + .Case("early", LangOptions::CSI_ModuleOptimizerEarly) + .Case("last", LangOptions::CSI_OptimizerLast) + .Case("tapirlate", LangOptions::CSI_TapirLate) + .Case("aftertapirloops", LangOptions::CSI_TapirLoopEnd) + .Default(LangOptions::CSI_None); + if (ParsedExt == LangOptions::CSI_None) { + Diags.Report(diag::err_drv_invalid_value) << FlagName << Val; + return LangOptions::CSI_None; + } else + return ParsedExt; + } else if (Args.hasArg(OPT_fcsi)) + // Use TapirLate extension point by default, for backwards compatability. + return LangOptions::CSI_TapirLate; + return LangOptions::CSI_None; +} + +static std::optional +serializeCSIExtensionPoint(LangOptions::CSIExtensionPoint X) { + std::optional CSIExtPtStr; + switch (X) { + case LangOptions::CSI_EarlyAsPossible: + CSIExtPtStr = "first"; + break; + case LangOptions::CSI_ModuleOptimizerEarly: + CSIExtPtStr = "early"; + break; + case LangOptions::CSI_OptimizerLast: + CSIExtPtStr = "last"; + break; + case LangOptions::CSI_TapirLate: + CSIExtPtStr = "tapirlate"; + break; + case LangOptions::CSI_TapirLoopEnd: + CSIExtPtStr = "aftertapirloops"; + break; + case LangOptions::CSI_None: + break; + } + return CSIExtPtStr; +} + static void parseXRayInstrumentationBundle(StringRef FlagName, StringRef Bundle, ArgList &Args, DiagnosticsEngine &D, XRayInstrSet &S) { @@ -1386,6 +1477,10 @@ void CompilerInvocation::GenerateCodeGenArgs( else if (!Opts.DirectAccessExternalData && LangOpts->PICLevel == 0) GenerateArg(Args, OPT_fno_direct_access_external_data, SA); + if (std::optional TapirTargetStr = + serializeTapirTarget(Opts.getTapirTarget())) + GenerateArg(Args, OPT_ftapir_EQ, *TapirTargetStr, SA); + std::optional DebugInfoVal; switch (Opts.DebugInfo) { case llvm::codegenoptions::DebugLineTablesOnly: @@ -1688,6 +1783,14 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, Opts.setDebugInfo(llvm::codegenoptions::LimitedDebugInfo); } + // Parse Tapir-related codegen options. + TapirTargetID TapirTarget = parseTapirTarget(Args); + if (TapirTarget == TapirTargetID::Last_TapirTargetID) + if (const Arg *A = Args.getLastArg(OPT_ftapir_EQ)) + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) + << A->getValue(); + Opts.setTapirTarget(TapirTarget); + for (const auto &Arg : Args.getAllArgValues(OPT_fdebug_prefix_map_EQ)) { auto Split = StringRef(Arg).split('='); Opts.DebugPrefixMap.emplace_back(Split.first, Split.second); @@ -3278,6 +3381,12 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Args, OPT_pic_is_pie, SA); for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); + if (std::optional CSIExtPt = serializeCSIExtensionPoint( + Opts.getComprehensiveStaticInstrumentation())) + GenerateArg(Args, OPT_fcsi_EQ, *CSIExtPt, SA); + if (std::optional Cilktool = + serializeCilktoolKind(Opts.getCilktool())) + GenerateArg(Args, OPT_fcilktool_EQ, *Cilktool, SA); return; } @@ -3346,6 +3455,13 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, if (Opts.IgnoreXCOFFVisibility) GenerateArg(Args, OPT_mignore_xcoff_visibility, SA); + if (Opts.getCilk() == LangOptions::Cilk_opencilk) + GenerateArg(Args, OPT_fopencilk, SA); + if (Opts.getCilk() == LangOptions::Cilk_plus) + GenerateArg(Args, OPT_fcilkplus, SA); + if (Opts.CilkOptions.has(CilkOpt_Pedigrees)) + GenerateArg(Args, OPT_fopencilk_enable_pedigrees, SA); + if (Opts.SignedOverflowBehavior == LangOptions::SOB_Trapping) { GenerateArg(Args, OPT_ftrapv, SA); GenerateArg(Args, OPT_ftrapv_handler, Opts.OverflowHandler, SA); @@ -3466,6 +3582,13 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, else if (Opts.DefaultFPContractMode == LangOptions::FPM_FastHonorPragmas) GenerateArg(Args, OPT_ffp_contract, "fast-honor-pragmas", SA); + if (std::optional CSIExtPt = serializeCSIExtensionPoint( + Opts.getComprehensiveStaticInstrumentation())) + GenerateArg(Args, OPT_fcsi_EQ, *CSIExtPt, SA); + if (std::optional Cilktool = + serializeCilktoolKind(Opts.getCilktool())) + GenerateArg(Args, OPT_fcilktool_EQ, *Cilktool, SA); + for (StringRef Sanitizer : serializeSanitizerKinds(Opts.Sanitize)) GenerateArg(Args, OPT_fsanitize_EQ, Sanitizer, SA); @@ -3543,6 +3666,11 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.PIE = Args.hasArg(OPT_pic_is_pie); parseSanitizerKinds("-fsanitize=", Args.getAllArgValues(OPT_fsanitize_EQ), Diags, Opts.Sanitize); + if (Args.hasArg(OPT_fcsi_EQ) || Args.hasArg(OPT_fcsi)) + Opts.setComprehensiveStaticInstrumentation( + parseCSIExtensionPoint("-fcsi=", Args, Diags)); + if (Args.hasArg(OPT_fcilktool_EQ)) + Opts.setCilktool(parseCilktoolKind("-fcilktool=", Args, Diags)); return Diags.getNumErrors() == NumErrorsBefore; } @@ -3712,6 +3840,22 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, if (T.isOSAIX() && (Args.hasArg(OPT_mignore_xcoff_visibility))) Opts.IgnoreXCOFFVisibility = 1; + bool OpenCilk = Args.hasArg(OPT_fopencilk); + bool CilkPlus = Args.hasArg(OPT_fcilkplus); + if (OpenCilk) { + if (CilkPlus) { + Diags.Report(diag::err_drv_double_cilk); + } + Opts.setCilk(LangOptions::Cilk_opencilk); + if (Args.hasArg(OPT_fopencilk_enable_pedigrees)) + Opts.CilkOptions.set(CilkOpt_Pedigrees, true); + } else if (CilkPlus) { + Opts.setCilk(LangOptions::Cilk_plus); + } + + if (Opts.getCilk() != LangOptions::Cilk_none && Opts.ObjC) + Diags.Report(diag::err_drv_cilk_objc); + if (Args.hasArg(OPT_ftrapv)) { Opts.setSignedOverflowBehavior(LangOptions::SOB_Trapping); // Set the handler, if one is specified. @@ -3941,6 +4085,15 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, systemIgnorelists.begin(), systemIgnorelists.end()); + // -fcsi + if (Args.hasArg(OPT_fcsi_EQ) || Args.hasArg(OPT_fcsi)) + Opts.setComprehensiveStaticInstrumentation( + parseCSIExtensionPoint("-fcsi=", Args, Diags)); + + // -fcilktool= + if (Args.hasArg(OPT_fcilktool_EQ)) + Opts.setCilktool(parseCilktoolKind("-fcilktool=", Args, Diags)); + if (Arg *A = Args.getLastArg(OPT_fclang_abi_compat_EQ)) { Opts.setClangABICompat(LangOptions::ClangABI::Latest); diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index f8fae82fba1208..45fcae3801e599 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -1290,6 +1290,59 @@ static void InitializePredefinedMacros(const TargetInfo &TI, } } + // Cilk definition + switch (LangOpts.getCilk()) { + case LangOptions::Cilk_none: + break; + case LangOptions::Cilk_plus: + Builder.defineMacro("__cilk", "200"); + break; + case LangOptions::Cilk_opencilk: + Builder.defineMacro("__cilk", "300"); + break; + } + + // Add macro to indicate whether pedigrees are enabled. + if (LangOpts.CilkOptions.has(CilkOption::CilkOpt_Pedigrees)) + Builder.defineMacro("__cilk_pedigrees__"); + + // Add macro to indicate that the program is compiled with Cilksan enabled. + if (LangOpts.Sanitize.has(SanitizerKind::Cilk)) { + Builder.defineMacro("__cilksan__"); + + // Rename library functions that Cilksan tracks for race detection. + + // Pthread locking routines + Builder.defineMacro("pthread_mutex_init", "__csan_pthread_mutex_init"); + Builder.defineMacro("pthread_mutex_destroy", + "__csan_pthread_mutex_destroy"); + Builder.defineMacro("pthread_mutex_lock", "__csan_pthread_mutex_lock"); + Builder.defineMacro("pthread_mutex_trylock", + "__csan_pthread_mutex_trylock"); + Builder.defineMacro("pthread_mutex_unlock", "__csan_pthread_mutex_unlock"); + Builder.defineMacro("pthread_once", "__csan_pthread_once"); + + // C11 locking routines + Builder.defineMacro("mtx_init", "__csan_mtx_init"); + Builder.defineMacro("mtx_destroy", "__csan_mtx_destroy"); + Builder.defineMacro("mtx_lock", "__csan_mtx_lock"); + Builder.defineMacro("mtx_trylock", "__csan_mtx_trylock"); + Builder.defineMacro("mtx_timedlock", "__csan_mtx_timedlock"); + Builder.defineMacro("mtx_unlock", "__csan_mtx_unlock"); + Builder.defineMacro("call_once", "__csan_call_once"); + } + + // Add macros to indicate that the program is compiled with different Cilk + // tools. + switch (LangOpts.getCilktool()) { + case LangOptions::CilktoolKind::Cilktool_Cilkscale: + case LangOptions::CilktoolKind::Cilktool_Cilkscale_InstructionCount: + case LangOptions::CilktoolKind::Cilktool_Cilkscale_Benchmark: + Builder.defineMacro("__cilkscale__"); + break; + default: break; + } + // CUDA device path compilaton if (LangOpts.CUDAIsDevice && !LangOpts.HIP) { // The CUDA_ARCH value is set for the GPU target specified in the NVPTX diff --git a/clang/lib/Parse/CMakeLists.txt b/clang/lib/Parse/CMakeLists.txt index 3f65eea5218b15..71a68937c40fca 100644 --- a/clang/lib/Parse/CMakeLists.txt +++ b/clang/lib/Parse/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangParse ParseAST.cpp + ParseCilk.cpp ParseCXXInlineMethods.cpp ParseDecl.cpp ParseDeclCXX.cpp diff --git a/clang/lib/Parse/ParseCilk.cpp b/clang/lib/Parse/ParseCilk.cpp new file mode 100644 index 00000000000000..4bdf98f98f818d --- /dev/null +++ b/clang/lib/Parse/ParseCilk.cpp @@ -0,0 +1,539 @@ +//===--- ParseCilk.cpp - Cilk Parsing -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Cilk portions of the Parser interface. +// +//===----------------------------------------------------------------------===// + +#include "clang/Parse/RAIIObjectsForParser.h" +#include "clang/Parse/Parser.h" + +using namespace clang; + +/// ParseCilkSyncStatement +/// cilk_sync-statement: +/// '_Cilk_sync' ';' +StmtResult Parser::ParseCilkSyncStatement() { + assert(Tok.is(tok::kw__Cilk_sync) && "Not a _Cilk_sync stmt!"); + return Actions.ActOnCilkSyncStmt(ConsumeToken()); +} + +/// ParseCilkSpawnStatement +/// cilk_spawn-statement: +/// '_Cilk_spawn' statement +StmtResult Parser::ParseCilkSpawnStatement() { + assert(Tok.is(tok::kw__Cilk_spawn) && "Not a _Cilk_spawn stmt!"); + SourceLocation SpawnLoc = ConsumeToken(); // eat the '_Cilk_spawn'. + + unsigned ScopeFlags = Scope::BlockScope | Scope::FnScope | Scope::DeclScope; + + if (Tok.is(tok::l_brace)) { + StmtResult SubStmt = ParseCompoundStatement(false, ScopeFlags | + Scope::CompoundStmtScope); + if (SubStmt.isInvalid()) + return StmtError(); + + return Actions.ActOnCilkSpawnStmt(SpawnLoc, SubStmt.get()); + } + + ParseScope CilkSpawnScope(this, ScopeFlags); + + // Parse statement of spawned child + StmtResult SubStmt = ParseStatement(); + CilkSpawnScope.Exit(); + + if (SubStmt.isInvalid()) + return StmtError(); + + return Actions.ActOnCilkSpawnStmt(SpawnLoc, SubStmt.get()); +} + +namespace { + +enum MisleadingStatementKind { MSK_if, MSK_else, MSK_for, MSK_while }; + +struct MisleadingIndentationChecker { + Parser &P; + SourceLocation StmtLoc; + SourceLocation PrevLoc; + unsigned NumDirectives; + MisleadingStatementKind Kind; + bool ShouldSkip; + MisleadingIndentationChecker(Parser &P, MisleadingStatementKind K, + SourceLocation SL) + : P(P), StmtLoc(SL), PrevLoc(P.getCurToken().getLocation()), + NumDirectives(P.getPreprocessor().getNumDirectives()), Kind(K), + ShouldSkip(P.getCurToken().is(tok::l_brace)) { + if (!P.MisleadingIndentationElseLoc.isInvalid()) { + StmtLoc = P.MisleadingIndentationElseLoc; + P.MisleadingIndentationElseLoc = SourceLocation(); + } + if (Kind == MSK_else && !ShouldSkip) + P.MisleadingIndentationElseLoc = SL; + } + + /// Compute the column number will aligning tabs on TabStop (-ftabstop), this + /// gives the visual indentation of the SourceLocation. + static unsigned getVisualIndentation(SourceManager &SM, SourceLocation Loc) { + unsigned TabStop = SM.getDiagnostics().getDiagnosticOptions().TabStop; + + unsigned ColNo = SM.getSpellingColumnNumber(Loc); + if (ColNo == 0 || TabStop == 1) + return ColNo; + + std::pair FIDAndOffset = SM.getDecomposedLoc(Loc); + + bool Invalid; + StringRef BufData = SM.getBufferData(FIDAndOffset.first, &Invalid); + if (Invalid) + return 0; + + const char *EndPos = BufData.data() + FIDAndOffset.second; + // FileOffset are 0-based and Column numbers are 1-based + assert(FIDAndOffset.second + 1 >= ColNo && + "Column number smaller than file offset?"); + + unsigned VisualColumn = 0; // Stored as 0-based column, here. + // Loop from beginning of line up to Loc's file position, counting columns, + // expanding tabs. + for (const char *CurPos = EndPos - (ColNo - 1); CurPos != EndPos; + ++CurPos) { + if (*CurPos == '\t') + // Advance visual column to next tabstop. + VisualColumn += (TabStop - VisualColumn % TabStop); + else + VisualColumn++; + } + return VisualColumn + 1; + } + + void Check() { + Token Tok = P.getCurToken(); + if (P.getActions().getDiagnostics().isIgnored( + diag::warn_misleading_indentation, Tok.getLocation()) || + ShouldSkip || NumDirectives != P.getPreprocessor().getNumDirectives() || + Tok.isOneOf(tok::semi, tok::r_brace) || Tok.isAnnotation() || + Tok.getLocation().isMacroID() || PrevLoc.isMacroID() || + StmtLoc.isMacroID() || + (Kind == MSK_else && P.MisleadingIndentationElseLoc.isInvalid())) { + P.MisleadingIndentationElseLoc = SourceLocation(); + return; + } + if (Kind == MSK_else) + P.MisleadingIndentationElseLoc = SourceLocation(); + + SourceManager &SM = P.getPreprocessor().getSourceManager(); + unsigned PrevColNum = getVisualIndentation(SM, PrevLoc); + unsigned CurColNum = getVisualIndentation(SM, Tok.getLocation()); + unsigned StmtColNum = getVisualIndentation(SM, StmtLoc); + + if (PrevColNum != 0 && CurColNum != 0 && StmtColNum != 0 && + ((PrevColNum > StmtColNum && PrevColNum == CurColNum) || + !Tok.isAtStartOfLine()) && + SM.getPresumedLineNumber(StmtLoc) != + SM.getPresumedLineNumber(Tok.getLocation()) && + (Tok.isNot(tok::identifier) || + P.getPreprocessor().LookAhead(0).isNot(tok::colon))) { + P.Diag(Tok.getLocation(), diag::warn_misleading_indentation) << Kind; + P.Diag(StmtLoc, diag::note_previous_statement); + } + } +}; + +} + +/// ParseCilkForStatement +/// cilk_for-statement: +/// '_Cilk_for' '(' expr ';' expr ';' expr ')' statement +/// '_Cilk_for' '(' declaration expr ';' expr ';' expr ')' statement +StmtResult Parser::ParseCilkForStatement(SourceLocation *TrailingElseLoc) { + assert(Tok.is(tok::kw__Cilk_for) && "Not a _Cilk_for stmt!"); + SourceLocation ForLoc = ConsumeToken(); // eat the '_Cilk_for'. + + // SourceLocation CoawaitLoc; + // if (Tok.is(tok::kw_co_await)) + // CoawaitLoc = ConsumeToken(); + + if (Tok.isNot(tok::l_paren)) { + Diag(Tok, diag::err_expected_lparen_after) << "_Cilk_for"; + SkipUntil(tok::semi); + return StmtError(); + } + + bool C99orCXXorObjC = getLangOpts().C99 || getLangOpts().CPlusPlus || + getLangOpts().ObjC; + + // A _Cilk_for statement is a block. Start the loop scope. + // + // C++ 6.4p3: + // A name introduced by a declaration in a condition is in scope from its + // point of declaration until the end of the substatements controlled by the + // condition. + // C++ 3.3.2p4: + // Names declared in the for-init-statement, and in the condition of if, + // while, for, and switch statements are local to the if, while, for, or + // switch statement (including the controlled statement). + // C++ 6.5.3p1: + // Names declared in the for-init-statement are in the same declarative-region + // as those declared in the condition. + // + unsigned ScopeFlags = Scope::DeclScope | Scope::ControlScope; + + ParseScope CilkForScope(this, ScopeFlags); + + BalancedDelimiterTracker T(*this, tok::l_paren); + T.consumeOpen(); + + ExprResult Value; + + bool ForEach = false; + StmtResult FirstPart; + Sema::ConditionResult SecondPart; + ExprResult Collection; + ForRangeInfo ForRangeInfo; + FullExprArg ThirdPart(Actions); + + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompleteOrdinaryName(getCurScope(), Sema::PCC_ForInit); + return StmtError(); + } + + ParsedAttributes attrs(AttrFactory); + MaybeParseCXX11Attributes(attrs); + + SourceLocation EmptyInitStmtSemiLoc; + + // Parse the first part of the for specifier. + if (Tok.is(tok::semi)) { // _Cilk_for (; + ProhibitAttributes(attrs); + // We disallow this syntax for now. + Diag(Tok, diag::err_cilk_for_missing_control_variable) << ";"; + ConsumeToken(); + } else if (getLangOpts().CPlusPlus && Tok.is(tok::identifier) && + isForRangeIdentifier()) { + ProhibitAttributes(attrs); + IdentifierInfo *Name = Tok.getIdentifierInfo(); + SourceLocation Loc = ConsumeToken(); + MaybeParseCXX11Attributes(attrs); + + ForRangeInfo.ColonLoc = ConsumeToken(); + if (Tok.is(tok::l_brace)) + ForRangeInfo.RangeExpr = ParseBraceInitializer(); + else + ForRangeInfo.RangeExpr = ParseExpression(); + + Diag(Loc, diag::err_for_range_identifier) + << ((getLangOpts().CPlusPlus11 && !getLangOpts().CPlusPlus17) + ? FixItHint::CreateInsertion(Loc, "auto &&") + : FixItHint()); + + ForRangeInfo.LoopVar = + Actions.ActOnCXXForRangeIdentifier(getCurScope(), Loc, Name, attrs); + } else if (isForInitDeclaration()) { // _Cilk_for (int X = 4; + ParenBraceBracketBalancer BalancerRAIIObj(*this); + + // Parse declaration, which eats the ';'. + if (!C99orCXXorObjC) // Use of C99-style for loops in C90 mode? + Diag(Tok, diag::ext_c99_variable_decl_in_for_loop); + + DeclGroupPtrTy DG; + if (Tok.is(tok::kw_using)) { + DG = ParseAliasDeclarationInInitStatement(DeclaratorContext::ForInit, + attrs); + } else { + // In C++0x, "for (T NS:a" might not be a typo for :: + bool MightBeForRangeStmt = getLangOpts().CPlusPlus; + ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt); + + SourceLocation DeclStart = Tok.getLocation(), DeclEnd; + ParsedAttributes DeclSpecAttrs(AttrFactory); + DeclGroupPtrTy DG = ParseSimpleDeclaration( + DeclaratorContext::ForInit, DeclEnd, attrs, DeclSpecAttrs, false, + MightBeForRangeStmt ? &ForRangeInfo : nullptr); + FirstPart = Actions.ActOnDeclStmt(DG, DeclStart, Tok.getLocation()); + if (ForRangeInfo.ParsedForRangeDecl()) { + Diag(ForRangeInfo.ColonLoc, getLangOpts().CPlusPlus11 ? + diag::warn_cxx98_compat_for_range : diag::ext_for_range); + + ForRangeInfo.LoopVar = FirstPart; + FirstPart = StmtResult(); + } else if (Tok.is(tok::semi)) { // for (int x = 4; + ConsumeToken(); + } else if ((ForEach = isTokIdentifier_in())) { + Actions.ActOnForEachDeclStmt(DG); + // ObjC: for (id x in expr) + ConsumeToken(); // consume 'in' + + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompleteObjCForCollection(getCurScope(), DG); + return StmtError(); + } + Collection = ParseExpression(); + } else { + Diag(Tok, diag::err_expected_semi_for); + } + } + } else { + ProhibitAttributes(attrs); + Value = Actions.CorrectDelayedTyposInExpr(ParseExpression()); + + ForEach = isTokIdentifier_in(); + + // Turn the expression into a stmt. + if (!Value.isInvalid()) { + if (ForEach) + FirstPart = Actions.ActOnForEachLValueExpr(Value.get()); + else { + // We already know this is not an init-statement within a for loop, so + // if we are parsing a C++11 range-based for loop, we should treat this + // expression statement as being a discarded value expression because + // we will err below. This way we do not warn on an unused expression + // that was an error in the first place, like with: for (expr : expr); + bool IsRangeBasedFor = + getLangOpts().CPlusPlus11 && !ForEach && Tok.is(tok::colon); + FirstPart = Actions.ActOnExprStmt(Value, !IsRangeBasedFor); + } + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } else if (ForEach) { + ConsumeToken(); // consume 'in' + + if (Tok.is(tok::code_completion)) { + cutOffParsing(); + Actions.CodeCompleteObjCForCollection(getCurScope(), nullptr); + return StmtError(); + } + Collection = ParseExpression(); + } else if (getLangOpts().CPlusPlus11 && Tok.is(tok::colon) && FirstPart.get()) { + // User tried to write the reasonable, but ill-formed, for-range-statement + // for (expr : expr) { ... } + Diag(Tok, diag::err_for_range_expected_decl) + << FirstPart.get()->getSourceRange(); + SkipUntil(tok::r_paren, StopBeforeMatch); + SecondPart = Sema::ConditionError(); + } else { + if (!Value.isInvalid()) { + Diag(Tok, diag::err_expected_semi_for); + } else { + // Skip until semicolon or rparen, don't consume it. + SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch); + if (Tok.is(tok::semi)) + ConsumeToken(); + } + } + } + + // Parse the second part of the for specifier. + if (!ForEach && !ForRangeInfo.ParsedForRangeDecl() && + !SecondPart.isInvalid()) { + // Parse the second part of the for specifier. + if (Tok.is(tok::semi)) { // for (...;; + // no second part. + Diag(Tok, diag::err_cilk_for_missing_condition); + } else if (Tok.is(tok::r_paren)) { + // missing both semicolons. + Diag(Tok, diag::err_cilk_for_missing_condition); + } else { + if (getLangOpts().CPlusPlus) { + // C++2a: We've parsed an init-statement; we might have a + // for-range-declaration next. + bool MightBeForRangeStmt = !ForRangeInfo.ParsedForRangeDecl(); + ColonProtectionRAIIObject ColonProtection(*this, MightBeForRangeStmt); + SecondPart = ParseCXXCondition( + nullptr, ForLoc, Sema::ConditionKind::Boolean, + // FIXME: recovery if we don't see another semi! + /*MissingOK=*/true, MightBeForRangeStmt ? &ForRangeInfo : nullptr, + /*EnterForConditionScope*/ true); + + if (ForRangeInfo.ParsedForRangeDecl()) { + Diag(FirstPart.get() ? FirstPart.get()->getBeginLoc() + : ForRangeInfo.ColonLoc, + getLangOpts().CPlusPlus20 + ? diag::warn_cxx17_compat_for_range_init_stmt + : diag::ext_for_range_init_stmt) + << (FirstPart.get() ? FirstPart.get()->getSourceRange() + : SourceRange()); + if (EmptyInitStmtSemiLoc.isValid()) { + Diag(EmptyInitStmtSemiLoc, diag::warn_empty_init_statement) + << /*for-loop*/ 2 + << FixItHint::CreateRemoval(EmptyInitStmtSemiLoc); + } + } + } else { + // We permit 'continue' and 'break' in the condition of a for loop. + getCurScope()->AddFlags(Scope::BreakScope | Scope::ContinueScope); + + ExprResult SecondExpr = ParseExpression(); + if (SecondExpr.isInvalid()) + SecondPart = Sema::ConditionError(); + else + SecondPart = Actions.ActOnCondition( + getCurScope(), ForLoc, SecondExpr.get(), + Sema::ConditionKind::Boolean, /*MissingOK=*/true); + } + } + } + + // Enter a break / continue scope, if we didn't already enter one while + // parsing the second part. + if (!(getCurScope()->getFlags() & Scope::ContinueScope)) + getCurScope()->AddFlags(Scope::BreakScope | Scope::ContinueScope); + + // Parse the third part of the for statement. + if (!ForEach && !ForRangeInfo.ParsedForRangeDecl()) { + if (Tok.isNot(tok::semi)) { + if (!SecondPart.isInvalid()) + Diag(Tok, diag::err_expected_semi_for); + else + // Skip until semicolon or rparen, don't consume it. + SkipUntil(tok::r_paren, StopAtSemi | StopBeforeMatch); + } + + if (Tok.is(tok::semi)) { + ConsumeToken(); + } + + // Parse the third part of the _Cilk_for specifier. + if (Tok.isNot(tok::r_paren)) { // for (...;...;) + ExprResult Third = ParseExpression(); + // FIXME: The C++11 standard doesn't actually say that this is a + // discarded-value expression, but it clearly should be. + ThirdPart = Actions.MakeFullDiscardedValueExpr(Third.get()); + } else { + Diag(Tok, diag::err_cilk_for_missing_increment); + } + } + // Match the ')'. + T.consumeClose(); + + // // C++ Coroutines [stmt.iter]: + // // 'co_await' can only be used for a range-based for statement. + // if (CoawaitLoc.isValid() && !ForRangeInfo.ParsedForRangeDecl()) { + // Diag(CoawaitLoc, diag::err_for_co_await_not_range_for); + // CoawaitLoc = SourceLocation(); + // } + + // if (CoawaitLoc.isValid() && getLangOpts().CPlusPlus20) + // Diag(CoawaitLoc, diag::warn_deprecated_for_co_await); + + // // We need to perform most of the semantic analysis for a C++0x for-range + // // statememt before parsing the body, in order to be able to deduce the type + // // of an auto-typed loop variable. + // StmtResult ForRangeStmt; + // StmtResult ForEachStmt; + + // TODO: Extend _Cilk_for to support these. + if (ForRangeInfo.ParsedForRangeDecl()) { + Diag(ForLoc, diag::err_cilk_for_forrange_loop_not_supported); + // ExprResult CorrectedRange = + // Actions.CorrectDelayedTyposInExpr(ForRangeInfo.RangeExpr.get()); + // ForRangeStmt = Actions.ActOnCXXForRangeStmt( + // getCurScope(), ForLoc, CoawaitLoc, FirstPart.get(), + // ForRangeInfo.LoopVar.get(), ForRangeInfo.ColonLoc, CorrectedRange.get(), + // T.getCloseLocation(), Sema::BFRK_Build); + + // Similarly, we need to do the semantic analysis for a for-range + // statement immediately in order to close over temporaries correctly. + } else if (ForEach) { + Diag(ForLoc, diag::err_cilk_for_foreach_loop_not_supported); + // ForEachStmt = Actions.ActOnObjCForCollectionStmt(ForLoc, + // FirstPart.get(), + // Collection.get(), + // T.getCloseLocation()); + } + // else { + // // In OpenMP loop region loop control variable must be captured and be + // // private. Perform analysis of first part (if any). + // if (getLangOpts().OpenMP && FirstPart.isUsable()) { + // Actions.ActOnOpenMPLoopInitialization(ForLoc, FirstPart.get()); + // } + // } + + // The body of the _Cilk_for statement is a scope, even if there is no + // compound stmt. We only do this if the body isn't a compound statement to + // avoid push/pop in common cases. + // + // C++ 6.5p2: + // The substatement in an iteration-statement implicitly defines a local scope + // which is entered and exited each time through the loop. + // + // See comments in ParseIfStatement for why we create a scope for + // for-init-statement/condition and a new scope for substatement in C++. + // + ParseScope InnerScope(this, Scope::DeclScope, /*C99orCXXorObjC*/ true, + Tok.is(tok::l_brace)); + + // The body of the for loop has the same local mangling number as the + // for-init-statement. + // It will only be incremented if the body contains other things that would + // normally increment the mangling number (like a compound statement). + getCurScope()->decrementMSManglingNumber(); + + MisleadingIndentationChecker MIChecker(*this, MSK_for, ForLoc); + + // Read the body statement. + StmtResult Body(ParseStatement(TrailingElseLoc)); + + if (Body.isUsable()) + MIChecker.Check(); + + // Pop the body scope if needed. + InnerScope.Exit(); + + // Leave the for-scope. + CilkForScope.Exit(); + + if (Body.isInvalid()) + return StmtError(); + + // if (ForEach) + // return Actions.FinishObjCForCollectionStmt(ForEachStmt.get(), Body.get()); + + // if (ForRangeInfo.ParsedForRangeDecl()) + // return Actions.FinishCXXForRangeStmt(ForRangeStmt.get(), Body.get()); + + return Actions.ActOnCilkForStmt(ForLoc, T.getOpenLocation(), FirstPart.get(), + nullptr, Sema::ConditionResult(), nullptr, + nullptr, SecondPart, ThirdPart, + T.getCloseLocation(), Body.get()); +} + +/// ParseCilkScopeStatement +/// cilk_scope-statement: +/// '_Cilk_scope' statement +StmtResult Parser::ParseCilkScopeStatement() { + assert(Tok.is(tok::kw__Cilk_scope) && "Not a _Cilk_scope stmt!"); + SourceLocation ScopeLoc = ConsumeToken(); // eat the '_Cilk_scope'. + + // TODO: Decide whether to allow break statements in _Cilk_scopes. + unsigned ScopeFlags = Scope::FnScope | Scope::DeclScope; + + if (Tok.is(tok::l_brace)) { + StmtResult SubStmt = ParseCompoundStatement(false, ScopeFlags | + Scope::CompoundStmtScope); + if (SubStmt.isInvalid()) + return StmtError(); + + return Actions.ActOnCilkScopeStmt(ScopeLoc, SubStmt.get()); + } + + ParseScope CilkScopeScope(this, ScopeFlags); + + // Parse statement of spawned child + StmtResult SubStmt = ParseStatement(); + CilkScopeScope.Exit(); + + if (SubStmt.isInvalid()) + return StmtError(); + + return Actions.ActOnCilkScopeStmt(ScopeLoc, SubStmt.get()); +} diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index cf1e3a94de7fdd..173a3e0a715689 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -5311,6 +5311,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const { case tok::kw___int128: case tok::kw_signed: case tok::kw_unsigned: + case tok::kw__Hyperobject: case tok::kw__Complex: case tok::kw__Imaginary: case tok::kw_void: @@ -5395,6 +5396,7 @@ bool Parser::isTypeSpecifierQualifier() { case tok::kw___int128: case tok::kw_signed: case tok::kw_unsigned: + case tok::kw__Hyperobject: case tok::kw__Complex: case tok::kw__Imaginary: case tok::kw_void: @@ -5602,6 +5604,7 @@ bool Parser::isDeclarationSpecifier( case tok::kw___int128: case tok::kw_signed: case tok::kw_unsigned: + case tok::kw__Hyperobject: case tok::kw__Complex: case tok::kw__Imaginary: case tok::kw_void: @@ -6097,6 +6100,9 @@ static bool isPtrOperatorToken(tok::TokenKind Kind, const LangOptions &Lang, Lang.getOpenCLCompatibleVersion() >= 200) return true; + if (Kind == tok::kw__Hyperobject) + return true; + if (!Lang.CPlusPlus) return false; @@ -6233,7 +6239,64 @@ void Parser::ParseDeclaratorInternal(Declarator &D, SourceLocation Loc = ConsumeToken(); // Eat the *, ^, & or &&. D.SetRangeEnd(Loc); - if (Kind == tok::star || Kind == tok::caret) { + if (Kind == tok::kw__Hyperobject) { + // Is a hyperobject. + DeclSpec DS(AttrFactory); + + ParseTypeQualifierListOpt(DS, AR_GNUAttributesParsedAndRejected, true, + !D.mayOmitIdentifier()); + + Expr *Reduce = nullptr, *Identity = nullptr; + if (Tok.is(tok::l_paren)) { + SourceLocation Open = ConsumeParen(); // Eat the parenthesis + SmallVector Args; + bool Reported = false, Error = false; + SourceLocation Close = Tok.getLocation(); + + if (!Tok.is(tok::r_paren)) + Reported = ParseSimpleExpressionList(Args); + if (Tok.is(tok::r_paren)) + Close = ConsumeParen(); // Eat the parenthesis + else + SkipUntil(tok::r_paren, StopAtSemi); + + if (!Reported) { + for (const Expr *Arg : Args) { + if (Arg->containsErrors()) { + Reported = true; + break; + } + } + } + + switch (Args.size()) { + case 0: + break; + case 2: + Identity = Args[0]; + Reduce = Args[1]; + break; + default: + Error = true; + break; + } + if (Error && !Reported) + Diag(Loc, diag::error_hyperobject_arguments) + << SourceRange(Open, Close); + } + + D.ExtendWithDeclSpec(DS); + + // Recursively parse the declarator. + ParseDeclaratorInternal(D, DirectDeclParser); + if (getLangOpts().getCilk() == LangOptions::Cilk_opencilk) + D.AddTypeInfo(DeclaratorChunk::getHyperobject( + DS.getTypeQualifiers(), Loc, SourceLocation(), + SourceLocation(), Identity, Reduce), + std::move(DS.getAttributes()), SourceLocation()); + else + Diag(Loc, diag::attribute_requires_cilk) << Kind; + } else if (Kind == tok::star || Kind == tok::caret) { // Is a pointer. DeclSpec DS(AttrFactory); @@ -7691,6 +7754,7 @@ void Parser::ParseMisplacedBracketDeclarator(Declarator &D) { break; case DeclaratorChunk::Array: case DeclaratorChunk::Function: + case DeclaratorChunk::Hyperobject: case DeclaratorChunk::Paren: break; } diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp index 75d04824d8b99e..d7dbdd1ea2a10d 100644 --- a/clang/lib/Parse/ParseExpr.cpp +++ b/clang/lib/Parse/ParseExpr.cpp @@ -116,7 +116,7 @@ using namespace clang; /// [C++] throw-expression [C++ 15] /// /// assignment-operator: one of -/// = *= /= %= += -= <<= >>= &= ^= |= +/// = *= /= %= += -= <<= >>= &= ^= |= [= _Cilk_spawn] /// /// expression: [C99 6.5.17] /// assignment-expression ...[opt] @@ -1745,6 +1745,22 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind, Res = ParseExpressionTrait(); break; + // postfix-expression: [CP] + // _Cilk_spawn[opt] postfix-expression '(' argument-expression-list[opt] ')' + case tok::kw__Cilk_spawn: { + SourceLocation SpawnLoc = ConsumeToken(); + // if (!getLangOpts().Cilk) { + // Diag(SpawnLoc, diag::err_cilkplus_disable); + // SkipUntil(tok::semi, StopAtSemi | StopBeforeMatch); + // return ExprError(); + // } + + Res = ParseCastExpression(AnyCastExpr); + if (!Res.isInvalid()) + Res = Actions.ActOnCilkSpawnExpr(SpawnLoc, Res.get()); + return Res; + } + case tok::at: { if (NotPrimaryExpression) *NotPrimaryExpression = true; diff --git a/clang/lib/Parse/ParseObjc.cpp b/clang/lib/Parse/ParseObjc.cpp index b30f0380621a17..13efff5fe98e0d 100644 --- a/clang/lib/Parse/ParseObjc.cpp +++ b/clang/lib/Parse/ParseObjc.cpp @@ -1148,6 +1148,7 @@ IdentifierInfo *Parser::ParseObjCSelectorPiece(SourceLocation &SelectorLoc) { case tok::kw__Complex: case tok::kw___alignof: case tok::kw___auto_type: + case tok::kw__Hyperobject: IdentifierInfo *II = Tok.getIdentifierInfo(); SelectorLoc = ConsumeToken(); return II; diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp index b3178aef64d72d..eff3a34836944c 100644 --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -178,6 +178,12 @@ struct PragmaOpenMPHandler : public PragmaHandler { Token &FirstToken) override; }; +struct PragmaCilkHintHandler : public PragmaHandler { + PragmaCilkHintHandler() : PragmaHandler("cilk") { } + void HandlePragma(Preprocessor &PP, PragmaIntroducer Introducer, + Token &FirstToken) override; +}; + /// PragmaCommentHandler - "\#pragma comment ...". struct PragmaCommentHandler : public PragmaHandler { PragmaCommentHandler(Sema &Actions) @@ -474,6 +480,11 @@ void Parser::initializePragmaHandlers() { PP.AddPragmaHandler("clang", CUDAForceHostDeviceHandler.get()); } + // if (getLangOpts().Cilk) { + CilkHintHandler = std::make_unique(); + PP.AddPragmaHandler(CilkHintHandler.get()); + // } + OptimizeHandler = std::make_unique(Actions); PP.AddPragmaHandler("clang", OptimizeHandler.get()); @@ -593,6 +604,11 @@ void Parser::resetPragmaHandlers() { CUDAForceHostDeviceHandler.reset(); } + // if (getLangOpts().Cilk) { + PP.RemovePragmaHandler(CilkHintHandler.get()); + CilkHintHandler.reset(); + // } + PP.RemovePragmaHandler("STDC", FPContractHandler.get()); FPContractHandler.reset(); @@ -1318,10 +1334,14 @@ static std::string PragmaLoopHintString(Token PragmaName, Token Option) { std::string ClangLoopStr("clang loop "); if (Str == "loop" && Option.getIdentifierInfo()) ClangLoopStr += Option.getIdentifierInfo()->getName(); + std::string CilkStr("cilk "); + if (Str == "cilk" && Option.getIdentifierInfo()) + CilkStr += Option.getIdentifierInfo()->getName(); return std::string(llvm::StringSwitch(Str) .Case("loop", ClangLoopStr) .Case("unroll_and_jam", Str) .Case("unroll", Str) + .Case("cilk", CilkStr) .Default("")); } @@ -3649,6 +3669,79 @@ void PragmaUnrollHintHandler::HandlePragma(Preprocessor &PP, /*DisableMacroExpansion=*/false, /*IsReinject=*/false); } +/// Handle the cilk_for loop pragmas. +/// #pragma cilk grainsize tapir-grainsize-value +/// #pragma cilk grainsize '(' tapir-grainsize-value ')' +/// +/// tapir-grainsize-value: +/// constant-expression +/// +void PragmaCilkHintHandler::HandlePragma(Preprocessor &PP, + PragmaIntroducer Introducer, + Token &Tok) { + // Incoming token is "cilk" for "#pragma cilk grainsize". + Token PragmaName = Tok; + SmallVector TokenList; + + PP.Lex(Tok); + if (Tok.isNot(tok::identifier)) { + PP.Diag(Tok.getLocation(), diag::err_pragma_loop_invalid_option) + << /*MissingOption=*/true << ""; + return; + } + + while (Tok.is(tok::identifier)) { + Token Option = Tok; + IdentifierInfo *OptionInfo = Tok.getIdentifierInfo(); + + bool OptionValid = llvm::StringSwitch(OptionInfo->getName()) + .Case("grainsize", true) + .Default(false); + if (!OptionValid) { + PP.Diag(Tok.getLocation(), diag::err_pragma_cilk_invalid_option) + << /*MissingOption=*/false << OptionInfo; + return; + } + PP.Lex(Tok); + + // Handle = for backwards compatibility + bool GrainsizeEq = Tok.is(tok::equal); + if (GrainsizeEq) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_cilk_grainsize_equals); + PP.Lex(Tok); + } + + // Read '(' if it exists. + bool ValueInParens = Tok.is(tok::l_paren); + if (ValueInParens) + PP.Lex(Tok); + + auto *Info = new (PP.getPreprocessorAllocator()) PragmaLoopHintInfo; + if (ParseLoopHintValue(PP, Tok, PragmaName, Option, ValueInParens, *Info)) + return; + + // Generate the loop hint token. + Token LoopHintTok; + LoopHintTok.startToken(); + LoopHintTok.setKind(tok::annot_pragma_loop_hint); + LoopHintTok.setLocation(PragmaName.getLocation()); + LoopHintTok.setAnnotationEndLoc(PragmaName.getLocation()); + LoopHintTok.setAnnotationValue(static_cast(Info)); + TokenList.push_back(LoopHintTok); + } + if (Tok.isNot(tok::eod)) { + PP.Diag(Tok.getLocation(), diag::warn_pragma_extra_tokens_at_eol) + << "cilk grainsize"; + return; + } + + auto TokenArray = std::make_unique(TokenList.size()); + std::copy(TokenList.begin(), TokenList.end(), TokenArray.get()); + + PP.EnterTokenStream(std::move(TokenArray), TokenList.size(), + /*DisableMacroExpansion=*/false, /*IsReinject=*/false); +} + /// Handle the Microsoft \#pragma intrinsic extension. /// /// The syntax is: diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp index 2346470dbdb73d..f7f8a9c04d5735 100644 --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -81,6 +81,7 @@ StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc, /// while-statement /// do-statement /// for-statement +/// [CP] cilk_for-statement /// /// expression-statement: /// expression[opt] ';' @@ -91,6 +92,8 @@ StmtResult Parser::ParseStatement(SourceLocation *TrailingElseLoc, /// 'break' ';' /// 'return' expression[opt] ';' /// [GNU] 'goto' '*' expression ';' +/// [CP] '_Cilk_spawn' statement ';' +/// [CP] '_Cilk_sync' ';' /// /// [OBC] objc-throw-statement: /// [OBC] '@' 'throw' expression ';' @@ -464,6 +467,40 @@ StmtResult Parser::ParseStatementOrDeclarationAfterAttributes( ProhibitAttributes(GNUAttrs); return HandlePragmaCaptured(); + case tok::kw__Cilk_spawn: // [CP] _Cilk_spawn statement + // if (!getLangOpts().Cilk) { + // Diag(Tok, diag::err_cilkplus_disable); + // SkipUntil(tok::semi); + // return StmtError(); + // } + return ParseCilkSpawnStatement(); + + case tok::kw__Cilk_sync: // [CP] _Cilk_sync statement + // if (!getLangOpts().Cilk) { + // Diag(Tok, diag::err_cilkplus_disable); + // SkipUntil(tok::semi); + // return StmtError(); + // } + Res = ParseCilkSyncStatement(); + SemiError = "_Cilk_sync"; + break; + + case tok::kw__Cilk_for: + // if (!getLangOpts().Cilk) { + // Diag(Tok, diag::err_cilkplus_disable); + // SkipUntil(tok::semi); + // return StmtError(); + // } + return ParseCilkForStatement(TrailingElseLoc); + + case tok::kw__Cilk_scope: // [CP] _Cilk_scope statement + // if (!getLangOpts().Cilk) { + // Diag(Tok, diag::err_cilkplus_disable); + // SkipUntil(tok::semi); + // return StmtError(); + // } + return ParseCilkScopeStatement(); + case tok::annot_pragma_openmp: // Prohibit attributes that are not OpenMP attributes, but only before // processing a #pragma omp clause. diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp index 66433705250010..b77530496f41f0 100644 --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -1519,6 +1519,9 @@ Parser::isCXXDeclarationSpecifier(ImplicitTypenameContext AllowImplicitTypename, // HLSL address space qualifiers case tok::kw_groupshared: + // Cilk + case tok::kw__Hyperobject: + // GNU case tok::kw_restrict: case tok::kw__Complex: diff --git a/clang/lib/Sema/CMakeLists.txt b/clang/lib/Sema/CMakeLists.txt index 629fafadcf9f45..7306de1ee87b81 100644 --- a/clang/lib/Sema/CMakeLists.txt +++ b/clang/lib/Sema/CMakeLists.txt @@ -31,6 +31,7 @@ add_clang_library(clangSema SemaCXXScopeSpec.cpp SemaCast.cpp SemaChecking.cpp + SemaCilk.cpp SemaCodeComplete.cpp SemaConcept.cpp SemaConsumer.cpp diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp index d59778b5b614b7..f81824a1f68e5f 100644 --- a/clang/lib/Sema/DeclSpec.cpp +++ b/clang/lib/Sema/DeclSpec.cpp @@ -335,6 +335,7 @@ bool Declarator::isDeclarationOfFunction() const { case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return false; } llvm_unreachable("Invalid type chunk"); diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp index 45ff36d5fe237b..1310860ea2205a 100644 --- a/clang/lib/Sema/JumpDiagnostics.cpp +++ b/clang/lib/Sema/JumpDiagnostics.cpp @@ -14,6 +14,7 @@ #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" @@ -154,6 +155,9 @@ static ScopePair GetDiagForGotoScopeDecl(Sema &S, const Decl *D) { if (VD->hasAttr()) return ScopePair(diag::note_protected_by_cleanup, diag::note_exits_cleanup); + if (VD->isReducer()) + return ScopePair(diag::note_protected_by_reducer, + diag::note_exits_cleanup); if (VD->hasLocalStorage()) { switch (VD->getType().isDestructedType()) { @@ -170,6 +174,7 @@ static ScopePair GetDiagForGotoScopeDecl(Sema &S, const Decl *D) { diag::note_exits_dtor); case QualType::DK_cxx_destructor: + case QualType::DK_hyperobject: OutDiag = diag::note_exits_dtor; break; @@ -258,6 +263,7 @@ void JumpScopeChecker::BuildScopeInformation(VarDecl *D, std::pair Diags; switch (destructKind) { case QualType::DK_cxx_destructor: + case QualType::DK_hyperobject: Diags = ScopePair(diag::note_enters_block_captures_cxx_obj, diag::note_exits_block_captures_cxx_obj); break; @@ -592,6 +598,89 @@ void JumpScopeChecker::BuildScopeInformation(Stmt *S, break; } + case Stmt::CilkScopeStmtClass: { + // Disallow jumps into _Cilk_scope statements. + CilkScopeStmt *CS = cast(S); + unsigned NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, diag::note_protected_by_cilk_scope, + 0, CS->getBeginLoc())); + BuildScopeInformation(CS->getBody(), NewParentScope); + return; + } + + case Stmt::CilkSpawnStmtClass: { + // Disallow jumps into or out of _Cilk_spawn statements. + CilkSpawnStmt *CS = cast(S); + unsigned NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_spawn, + diag::note_exits_spawn, + CS->getBeginLoc())); + BuildScopeInformation(CS->getSpawnedStmt(), NewParentScope); + return; + } + + case Stmt::CilkForStmtClass: { + CilkForStmt *CF = cast(S); + + if (Stmt *Init = CF->getInit()) + BuildScopeInformation(Init, ParentScope); + // if (Stmt *Limit = CF->getLimitStmt()) + // BuildScopeInformation(Limit, ParentScope); + // if (Stmt *Begin = CF->getBeginStmt()) + // BuildScopeInformation(Begin, ParentScope); + // if (Stmt *End = CF->getEndStmt()) + // BuildScopeInformation(End, ParentScope); + + // Cannot jump into the middle of the condition. + unsigned NewParentScope; + if (Expr *InitCond = CF->getInitCond()) { + NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_cilk_for, + diag::note_exits_cilk_for, + CF->getBeginLoc())); + BuildScopeInformation(InitCond, NewParentScope); + } + if (Expr *Cond = CF->getCond()) { + NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_cilk_for, + diag::note_exits_cilk_for, + CF->getBeginLoc())); + BuildScopeInformation(Cond, NewParentScope); + } + + // Cannot jump into the increment. + if (Expr *Inc = CF->getInc()) { + NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_cilk_for, + diag::note_exits_cilk_for, + CF->getBeginLoc())); + BuildScopeInformation(Inc, NewParentScope); + } + + // Cannot jump into the loop-variable declaration + if (DeclStmt *LV = CF->getLoopVarStmt()) { + NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_cilk_for, + diag::note_exits_cilk_for, + CF->getBeginLoc())); + BuildScopeInformation(LV, NewParentScope); + } + + // Cannot jump into the loop body + NewParentScope = Scopes.size(); + Scopes.push_back(GotoScope(ParentScope, + diag::note_protected_by_cilk_for, + diag::note_exits_cilk_for, + CF->getBeginLoc())); + BuildScopeInformation(CF->getBody(), NewParentScope); + return; + } + case Stmt::CaseStmtClass: case Stmt::DefaultStmtClass: case Stmt::LabelStmtClass: @@ -940,6 +1029,15 @@ void JumpScopeChecker::CheckJump(Stmt *From, Stmt *To, SourceLocation DiagLoc, S.Diag(From->getBeginLoc(), diag::warn_jump_out_of_seh_finally); break; } + // Similarly, check for jumps out of _Cilk_spawn or _Cilk_for. + if (Scopes[I].InDiag == diag::note_protected_by_spawn) { + S.Diag(From->getBeginLoc(), diag::err_jump_out_of_spawn); + break; + } + if (Scopes[I].InDiag == diag::note_protected_by_cilk_for) { + S.Diag(From->getBeginLoc(), diag::err_jump_out_of_cilk_for); + break; + } if (Scopes[I].InDiag == diag::note_omp_protected_structured_block) { S.Diag(From->getBeginLoc(), diag::err_goto_into_protected_scope); S.Diag(To->getBeginLoc(), diag::note_omp_exits_structured_block); diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index b338d601db7397..aea24c16a4a0c3 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -3332,6 +3332,10 @@ ExprResult Sema::BuildCStyleCastExpr(SourceLocation LPLoc, TypeSourceInfo *CastTypeInfo, SourceLocation RPLoc, Expr *CastExpr) { + if (CastTypeInfo->getType()->getTypeClass() == Type::Hyperobject) { + Diag(LPLoc, diag::err_hyperobject_cast); + return ExprError(); + } CastOperation Op(*this, CastTypeInfo->getType(), CastExpr); Op.DestRange = CastTypeInfo->getTypeLoc().getSourceRange(); Op.OpRange = SourceRange(LPLoc, CastExpr->getEndLoc()); @@ -3360,6 +3364,12 @@ ExprResult Sema::BuildCXXFunctionalCastExpr(TypeSourceInfo *CastTypeInfo, Expr *CastExpr, SourceLocation RPLoc) { assert(LPLoc.isValid() && "List-initialization shouldn't get here."); + + if (Type->getTypeClass() == Type::Hyperobject) { + Diag(LPLoc, diag::err_hyperobject_cast); + return ExprError(); + } + CastOperation Op(*this, Type, CastExpr); Op.DestRange = CastTypeInfo->getTypeLoc().getSourceRange(); Op.OpRange = SourceRange(Op.DestRange.getBegin(), CastExpr->getEndLoc()); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index a94f009f3fa697..a1d9d5b46dc8c3 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -16215,6 +16215,17 @@ class SequenceChecker : public ConstEvaluatedExprVisitor { notePostMod(O, UO, UK_ModAsSideEffect); } + void VisitCilkSpawnExpr(const CilkSpawnExpr *E) { + Object O = getObject(E->getSpawnedExpr(), true); + if (!O) + return VisitExpr(E); + + // Cilk_spawn removes sequencing of the spawned expression. + // notePreUse(O, E); + Visit(E->getSpawnedExpr()); + // notePostUse(O, E); + } + void VisitBinLOr(const BinaryOperator *BO) { // C++11 [expr.log.or]p2: // If the second expression is evaluated, every value computation and diff --git a/clang/lib/Sema/SemaCilk.cpp b/clang/lib/Sema/SemaCilk.cpp new file mode 100644 index 00000000000000..f460199617ab64 --- /dev/null +++ b/clang/lib/Sema/SemaCilk.cpp @@ -0,0 +1,140 @@ +//===--- SemaCilk.cpp - Semantic analysis for Cilk extensions -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements semantic analysis for Cilk extensions. +// +//===----------------------------------------------------------------------===// + +#include "clang/AST/ExprCilk.h" +#include "clang/AST/StmtCilk.h" +#include "clang/Sema/SemaInternal.h" +using namespace clang; +using namespace sema; + +static bool isValidCilkContext(Sema &S, SourceLocation Loc, StringRef Keyword) { + // Cilk is not permitted in unevaluated contexts. + if (S.isUnevaluatedContext()) { + S.Diag(Loc, diag::err_cilk_unevaluated_context) << Keyword; + return false; + } + + // Any other usage must be within a function. + FunctionDecl *FD = dyn_cast(S.CurContext); + if (!FD) { + S.Diag(Loc, diag::err_cilk_outside_function) << Keyword; + return false; + } + + // A spawn cannot appear in a control scope. + if (S.getCurScope()->getFlags() & Scope::ControlScope) { + S.Diag(Loc, diag::err_spawn_invalid_scope) << Keyword; + return false; + } + + // TODO: Add more checks for the validity of the current context for Cilk. + // (See isValidCoroutineContext for example code.) + return true; +} + +/// Check that this is a context in which a Cilk keywords can appear. +static FunctionScopeInfo *checkCilkContext(Sema &S, SourceLocation Loc, + StringRef Keyword) { + if (!isValidCilkContext(S, Loc, Keyword)) + return nullptr; + + assert(isa(S.CurContext) && "not in a function scope"); + FunctionScopeInfo *ScopeInfo = S.getCurFunction(); + assert(ScopeInfo && "missing function scope for function"); + + return ScopeInfo; +} + +// Borrowed from SemaDeclCXX.cpp and modified. +static void SearchForReturnInStmt(Sema &Self, Stmt *S) { + if (isa(S)) + Self.Diag(S->getBeginLoc(), + diag::err_cilk_spawn_cannot_return); + + for (Stmt *SubStmt : S->children()) { + if (!SubStmt) + continue; + if (!isa(SubStmt)) + SearchForReturnInStmt(Self, SubStmt); + } +} + +StmtResult +Sema::ActOnCilkSpawnStmt(SourceLocation SpawnLoc, Stmt *SubStmt) { + if (!checkCilkContext(*this, SpawnLoc, "_Cilk_spawn")) + return StmtError(); + + SearchForReturnInStmt(*this, SubStmt); + + setFunctionHasBranchProtectedScope(); + + PushFunctionScope(); + PushExpressionEvaluationContext( + ExpressionEvaluationContext::PotentiallyEvaluated); + + StmtResult Result = new (Context) CilkSpawnStmt(SpawnLoc, SubStmt); + + PopExpressionEvaluationContext(); + PopFunctionScopeInfo(); + + return Result; +} + +StmtResult +Sema::ActOnCilkSyncStmt(SourceLocation SyncLoc) { + if (!checkCilkContext(*this, SyncLoc, "_Cilk_sync")) + return StmtError(); + return new (Context) CilkSyncStmt(SyncLoc); +} + +ExprResult Sema::ActOnCilkSpawnExpr(SourceLocation Loc, Expr *E) { + FunctionScopeInfo *CilkCtx = checkCilkContext(*this, Loc, "_Cilk_spawn"); + if (!CilkCtx) { + CorrectDelayedTyposInExpr(E); + return ExprError(); + } + + PushExpressionEvaluationContext( + ExpressionEvaluationContext::PotentiallyEvaluated); + + if (E->getType()->isPlaceholderType()) { + ExprResult R = CheckPlaceholderExpr(E); + if (R.isInvalid()) return ExprError(); + E = R.get(); + } + + PopExpressionEvaluationContext(); + + ExprResult Result = + new (Context) CilkSpawnExpr(Loc, MaybeCreateExprWithCleanups(E)); + + return Result; +} + +StmtResult +Sema::ActOnCilkScopeStmt(SourceLocation ScopeLoc, Stmt *SubStmt) { + if (!checkCilkContext(*this, ScopeLoc, "_Cilk_scope")) + return StmtError(); + + setFunctionHasBranchProtectedScope(); + + PushFunctionScope(); + PushExpressionEvaluationContext( + ExpressionEvaluationContext::PotentiallyEvaluated); + + StmtResult Result = new (Context) CilkScopeStmt(ScopeLoc, SubStmt); + + PopExpressionEvaluationContext(); + PopFunctionScopeInfo(); + + return Result; +} diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 21b5781a71cddd..4aa5bc085996db 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -8675,7 +8675,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) { bool isVM = T->isVariablyModifiedType(); if (isVM || NewVD->hasAttr() || - NewVD->hasAttr()) + NewVD->hasAttr() || NewVD->getType()->isHyperobjectType()) setFunctionHasBranchProtectedScope(); if ((isVM && NewVD->hasLinkage()) || @@ -13655,7 +13655,7 @@ void Sema::ActOnUninitializedDecl(Decl *RealDecl) { return; if (VarDecl *Var = dyn_cast(RealDecl)) { - QualType Type = Var->getType(); + QualType Type = Var->getType().stripHyperobject(); // C++1z [dcl.dcl]p1 grammar implies that an initializer is mandatory. if (isa(RealDecl)) { @@ -14162,7 +14162,7 @@ void Sema::CheckCompleteVariableDeclaration(VarDecl *var) { } - QualType type = var->getType(); + QualType type = var->getType().stripHyperobject(); if (var->hasAttr()) getCurFunction()->addByrefBlockVar(var); @@ -14490,6 +14490,14 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) { if (VD->isFileVarDecl() && !isa(VD)) MarkUnusedFileScopedDecl(VD); + // This is only a shallow search. See also SemaType.cpp ContainsHyperobject. + if (VD->getType()->isArrayType()) { + const ArrayType *A = VD->getType()->getAsArrayTypeUnsafe(); + const HyperobjectType *H = A->getElementType()->getAs(); + if (H && H->hasCallbacks()) + Diag(VD->getLocation(), diag::no_reducer_array); + } + // Now we have parsed the initializer and can update the table of magic // tag values. if (!VD->hasAttr() || @@ -14823,6 +14831,12 @@ Decl *Sema::ActOnParamDeclarator(Scope *S, Declarator &D) { if (getLangOpts().OpenCL) deduceOpenCLAddressSpace(New); + if (New->getType()->getTypeClass() == Type::Hyperobject) { + Diag(New->getLocation(), diag::err_hyperobject_param); + // Disabling the parameter is easier than making argument passing work. + New->setInvalidDecl(); + } + return New; } @@ -18761,6 +18775,14 @@ void Sema::ActOnFields(Scope *S, SourceLocation RecLoc, Decl *EnclosingDecl, continue; } + if (!FDTy->isDependentType()) { + if (const HyperobjectType *HT = FDTy->getAs()) { + if (HT->hasCallbacks()) + Diag(FD->getLocation(), diag::reducer_callbacks_not_allowed) + << FD->getDeclName(); + } + } + // C99 6.7.2.1p2: // A structure or union shall not contain a member with // incomplete or function type (hence, a structure shall not diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index ed69e802c95dd5..7a962dd65813c5 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -9493,6 +9493,32 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_UsingIfExists: handleSimpleAttribute(S, D, AL); break; + + // Cilk attributes + case ParsedAttr::AT_StrandMalloc: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_StrandPure: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_Stealable: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_ReducerRegister: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_HyperView: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_HyperToken: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_ReducerUnregister: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_Injective: + handleSimpleAttribute(S, D, AL); + break; } } diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index b62f3c475c450c..3bfa98ee3724f8 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -11067,6 +11067,7 @@ void Sema::CheckConversionDeclarator(Declarator &D, QualType &R, case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: extendLeft(Before, Chunk.getSourceRange()); break; @@ -14872,6 +14873,14 @@ void Sema::DefineImplicitCopyAssignment(SourceLocation CurrentLocation, continue; } + if (Field->getType()->isHyperobjectType()) { + Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) + << Context.getTagDeclType(ClassDecl) << 2 << Field->getDeclName(); + Diag(Field->getLocation(), diag::note_declared_at); + Invalid = true; + continue; + } + // Suppress assigning zero-width bitfields. if (Field->isZeroLengthBitField(Context)) continue; @@ -15247,6 +15256,14 @@ void Sema::DefineImplicitMoveAssignment(SourceLocation CurrentLocation, continue; } + if (Field->getType()->isHyperobjectType()) { + Diag(ClassDecl->getLocation(), diag::err_uninitialized_member_for_assign) + << Context.getTagDeclType(ClassDecl) << 2 << Field->getDeclName(); + Diag(Field->getLocation(), diag::note_declared_at); + Invalid = true; + continue; + } + // Suppress assigning zero-width bitfields. if (Field->isZeroLengthBitField(Context)) continue; diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index 9b7ff5ff82519c..9b5dcd55353a98 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp @@ -1297,6 +1297,7 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Expr::ConvertVectorExprClass: case Expr::VAArgExprClass: case Expr::CXXParenListInitExprClass: + case Expr::CilkSpawnExprClass: return canSubStmtsThrow(*this, S); case Expr::CompoundLiteralExprClass: @@ -1524,6 +1525,10 @@ CanThrowResult Sema::canThrow(const Stmt *S) { case Stmt::SEHTryStmtClass: case Stmt::SwitchStmtClass: case Stmt::WhileStmtClass: + case Stmt::CilkSpawnStmtClass: + case Stmt::CilkSyncStmtClass: + case Stmt::CilkForStmtClass: + case Stmt::CilkScopeStmtClass: return canSubStmtsThrow(*this, S); case Stmt::DeclStmtClass: { diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 3a5e302cc03a31..af5acea59f293f 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -521,6 +521,8 @@ ExprResult Sema::DefaultFunctionArrayConversion(Expr *E, bool Diagnose) { E = result.get(); } + E = BuildHyperobjectLookup(E); + QualType Ty = E->getType(); assert(!Ty.isNull() && "DefaultFunctionArrayConversion - missing type"); @@ -683,6 +685,10 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { } CheckForNullPointerDereference(*this, E); + + E = BuildHyperobjectLookup(E); + T = E->getType(); + if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, &Context.Idents.get("object_getClass"), @@ -2172,6 +2178,82 @@ NonOdrUseReason Sema::getNonOdrUseReasonInCurrentContext(ValueDecl *D) { return NOUR_None; } +Expr *Sema::BuildHyperobjectLookup(Expr *E, bool Pointer) { + if (!Pointer && !E->isLValue()) + return E; + + if (getLangOpts().getCilk() != LangOptions::Cilk_opencilk) + return E; + + QualType InputType = E->getType(); + if (Pointer) { + const PointerType *PT = InputType->getAs(); + if (!PT) + return E; + InputType = PT->getPointeeType(); + } + + const HyperobjectType *HT = InputType->getAs(); + if (!HT) + return E; + + bool Difficult = CurContext->isDependentContext(); + + QualType ResultType = HT->getElementType().withFastQualifiers( + InputType.getLocalFastQualifiers()); + QualType Ptr = Context.getPointerType(ResultType); + + ExprResult SizeExpr; + if (ResultType.getTypePtr()->isDependentType()) { + SizeExpr = CreateUnaryExprOrTypeTraitExpr(E, E->getExprLoc(), UETT_SizeOf); + } else { + QualType SizeType = Context.getSizeType(); + llvm::APInt Size(Context.getTypeSize(SizeType), + Context.getTypeSizeInChars(ResultType).getQuantity()); + SizeExpr = IntegerLiteral::Create(Context, Size, SizeType, E->getExprLoc()); + } + + Expr *VarAddr; + if (Pointer) { + VarAddr = E; + } else if (Difficult) { + ExprResult Address = + BuildBuiltinCallExpr(E->getExprLoc(), Builtin::BI__builtin_addressof, E); + assert(Address.isUsable()); + VarAddr = Address.get(); + } else { + VarAddr = UnaryOperator::Create(Context, E, UO_AddrOf, Ptr, VK_PRValue, + OK_Ordinary, SourceLocation(), false, + CurFPFeatureOverrides()); + } + Expr *CallArgs[] = {VarAddr, SizeExpr.get(), HT->getIdentity(), + HT->getReduce()}; + ExprResult Call = + BuildBuiltinCallExpr(E->getExprLoc(), Builtin::BI__hyper_lookup, CallArgs); + + // Template expansion normally strips out implicit casts, so make this + // explicit in C++. + CastExpr *Casted = nullptr; + if (Difficult) + Casted = CXXStaticCastExpr::Create( + Context, Ptr, VK_PRValue, CK_BitCast, Call.get(), nullptr, + Context.CreateTypeSourceInfo(Ptr), FPOptionsOverride(), + SourceLocation(), SourceLocation(), SourceRange()); + else + Casted = + ImplicitCastExpr::Create(Context, Ptr, CK_BitCast, Call.get(), nullptr, + VK_PRValue, CurFPFeatureOverrides()); + + if (Pointer) + return Casted; + + auto *Deref = UnaryOperator::Create(Context, Casted, UO_Deref, ResultType, + VK_LValue, OK_Ordinary, SourceLocation(), + false, CurFPFeatureOverrides()); + + return Deref; +} + /// BuildDeclRefExpr - Build an expression that references a /// declaration that does not require a closure capture. DeclRefExpr * @@ -4631,6 +4713,9 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T, case Type::Atomic: T = cast(Ty)->getValueType(); break; + case Type::Hyperobject: + T = cast(Ty)->getElementType(); + break; } } while (!T.isNull() && T->isVariablyModifiedType()); } @@ -6801,6 +6886,24 @@ static bool checkArgsForPlaceholders(Sema &S, MultiExprArg args) { return hasInvalid; } +/// checkNumSpawnedExprs - Check that an appropriate number of spawned +/// expressions appear in args. +static bool checkNumSpawnedExprs(Sema &S, MultiExprArg args) { + unsigned count = 0; + for (size_t i = 0, e = args.size(); i != e; i++) { + if (isa(args[i])) { + ++count; + if (count > 1) { + if (count == 2) + S.Diag(args[i]->getBeginLoc(), diag::err_multiple_spawns); + else + S.Diag(args[i]->getBeginLoc(), diag::note_multiple_spawns); + } + } + } + return (count > 1); +} + /// If a builtin function has a pointer argument with no explicit address /// space, then it should be able to accept a pointer to any address /// space as input. In order to do this, we need to replace the @@ -6823,6 +6926,9 @@ static FunctionDecl *rewriteBuiltinFunctionDecl(Sema *Sema, ASTContext &Context, ArgExprs.size() < FT->getNumParams()) return nullptr; + if (FDecl->getBuiltinID() == Builtin::BI__builtin_addressof) + return nullptr; + bool NeedsNewDecl = false; unsigned i = 0; SmallVector OverloadParams; @@ -7071,6 +7177,10 @@ ExprResult Sema::BuildCallExpr(Scope *Scope, Expr *Fn, SourceLocation LParenLoc, if (checkArgsForPlaceholders(*this, ArgExprs)) return ExprError(); + // Check that we have at most one spawned argument. + if (checkNumSpawnedExprs(*this, ArgExprs)) + return ExprError(); + if (getLangOpts().CPlusPlus) { // If this is a pseudo-destructor expression, build the call immediately. if (isa(Fn)) { @@ -10420,6 +10530,8 @@ Sema::CheckSingleAssignmentConstraints(QualType LHSType, ExprResult &CallerRHS, // they ask us to issue diagnostics. assert((ConvertRHS || !Diagnose) && "can't indicate whether we diagnosed"); + LHSType = LHSType.stripHyperobject(); + // If ConvertRHS is false, we want to leave the caller's RHS untouched. Sadly, // we can't avoid *all* modifications at the moment, so we need some somewhere // to put the updated value. @@ -14371,6 +14483,9 @@ static bool CheckForModifiableLvalue(Expr *E, SourceLocation Loc, Sema &S) { case Expr::MLV_SubObjCPropertySetting: DiagID = diag::err_no_subobject_property_setting; break; + case Expr::MLV_HyperobjectField: + DiagID = diag::err_hyperobject_struct_assign; + break; } SourceRange Assign; @@ -15260,6 +15375,12 @@ Sema::getSelfAssignmentClassMemberCandidate(const ValueDecl *SelfAssigned) { return (Field != Parent->field_end()) ? *Field : nullptr; } +/// Check if Expr is an illegal spawn expression. +static void CheckForIllegalSpawn(Sema &S, Expr *Expr) { + if (isa(Expr->IgnoreImplicit())) + S.Diag(Expr->getExprLoc(), diag::err_invalid_spawn_expr); +} + /// DiagnoseSelfAssignment - Emits a warning if a value is assigned to itself. /// This warning suppressed in the event of macro expansions. static void DiagnoseSelfAssignment(Sema &S, Expr *LHSExpr, Expr *RHSExpr, @@ -15505,6 +15626,17 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc, checkTypeSupport(LHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr); checkTypeSupport(RHSExpr->getType(), OpLoc, /*ValueDecl*/ nullptr); + // Check for illegal spawns + // TODO: Add support for _Cilk_spawn on the RHS of a compound-assignment + // operator. + if (!BinaryOperator::isAssignmentOp(Opc) || + BinaryOperator::isCompoundAssignmentOp(Opc)) + CheckForIllegalSpawn(*this, RHS.get()); + CheckForIllegalSpawn(*this, LHS.get()); + + if (BinaryOperator::isAssignmentOp(Opc)) + LHS = BuildHyperobjectLookup(LHS.get()); + switch (Opc) { case BO_Assign: ResultTy = CheckAssignmentOperands(LHS.get(), RHS, OpLoc, QualType(), Opc); @@ -15965,6 +16097,12 @@ static ExprResult BuildOverloadedBinOp(Sema &S, Scope *Sc, SourceLocation OpLoc, break; } + // Check for illegal spawns + if (!BinaryOperator::isAssignmentOp(Opc) || + BinaryOperator::isCompoundAssignmentOp(Opc)) + CheckForIllegalSpawn(S, RHS); + CheckForIllegalSpawn(S, LHS); + // Find all of the overloaded operators visible from this point. UnresolvedSet<16> Functions; S.LookupBinOp(Sc, OpLoc, Opc, Functions); @@ -15981,8 +16119,8 @@ ExprResult Sema::BuildBinOp(Scope *S, SourceLocation OpLoc, std::tie(LHS, RHS) = CorrectDelayedTyposInBinOp(*this, Opc, LHSExpr, RHSExpr); if (!LHS.isUsable() || !RHS.isUsable()) return ExprError(); - LHSExpr = LHS.get(); - RHSExpr = RHS.get(); + LHSExpr = BuildHyperobjectLookup(LHS.get()); + RHSExpr = BuildHyperobjectLookup(RHS.get()); // We want to end up calling one of checkPseudoObjectAssignment // (if the LHS is a pseudo-object), BuildOverloadedBinOp (if @@ -16167,6 +16305,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, case UO_PreDec: case UO_PostInc: case UO_PostDec: + Input = BuildHyperobjectLookup(InputExpr); resultType = CheckIncrementDecrementOperand(*this, Input.get(), VK, OK, OpLoc, Opc == UO_PreInc || @@ -16176,6 +16315,8 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, CanOverflow = isOverflowingIntegerType(Context, resultType); break; case UO_AddrOf: + // Before CheckAddressOfOperand + Input = BuildHyperobjectLookup(InputExpr); resultType = CheckAddressOfOperand(Input, OpLoc); CheckAddressOfNoDeref(InputExpr); RecordModifiableNonNullParam(*this, InputExpr); @@ -16319,6 +16460,7 @@ ExprResult Sema::CreateBuiltinUnaryOp(SourceLocation OpLoc, break; case UO_Real: case UO_Imag: + Input = BuildHyperobjectLookup(InputExpr); resultType = CheckRealImagOperand(*this, Input, OpLoc, Opc == UO_Real); // _Real maps ordinary l-values into ordinary l-values. _Imag maps ordinary // complex l-values to ordinary l-values and all other values to r-values. @@ -16439,16 +16581,20 @@ ExprResult Sema::BuildUnaryOp(Scope *S, SourceLocation OpLoc, Input = Result.get(); } - if (getLangOpts().CPlusPlus && Input->getType()->isOverloadableType() && - UnaryOperator::getOverloadedOperator(Opc) != OO_None && - !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { - // Find all of the overloaded operators visible from this point. - UnresolvedSet<16> Functions; - OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); - if (S && OverOp != OO_None) - LookupOverloadedOperatorName(OverOp, S, Functions); - - return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); + if (getLangOpts().CPlusPlus) { + // A hyperobject may need to be converted to a view. + QualType Real = Input->getType().stripHyperobject(); + if (Real->isOverloadableType() && + UnaryOperator::getOverloadedOperator(Opc) != OO_None && + !(Opc == UO_AddrOf && isQualifiedMemberAccess(Input))) { + // Find all of the overloaded operators visible from this point. + UnresolvedSet<16> Functions; + OverloadedOperatorKind OverOp = UnaryOperator::getOverloadedOperator(Opc); + if (S && OverOp != OO_None) + LookupOverloadedOperatorName(OverOp, S, Functions); + + return CreateOverloadedUnaryOp(OpLoc, Opc, Functions, Input); + } } return CreateBuiltinUnaryOp(OpLoc, Opc, Input, IsAfterAmp); diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 1cff4a75790ec7..8cd1a666eb4bad 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4376,6 +4376,11 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, .get(); break; + case ICK_Hyperobject_To_View: + FromType = FromType.stripHyperobject(); + From = BuildHyperobjectLookup(From, false); + break; + default: llvm_unreachable("Improper first standard conversion"); } @@ -4696,6 +4701,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, case ICK_Array_To_Pointer: case ICK_Function_To_Pointer: case ICK_Function_Conversion: + case ICK_Hyperobject_To_View: case ICK_Qualification: case ICK_Num_Conversion_Kinds: case ICK_C_Only_Conversion: diff --git a/clang/lib/Sema/SemaExprMember.cpp b/clang/lib/Sema/SemaExprMember.cpp index 3d14ca3859bb61..ca7e74f92fcdd7 100644 --- a/clang/lib/Sema/SemaExprMember.cpp +++ b/clang/lib/Sema/SemaExprMember.cpp @@ -1228,10 +1228,14 @@ static bool isPointerToRecordType(QualType T) { /// Perform conversions on the LHS of a member access expression. ExprResult Sema::PerformMemberExprBaseConversion(Expr *Base, bool IsArrow) { + ExprResult Result; if (IsArrow && !Base->getType()->isFunctionType()) - return DefaultFunctionArrayLvalueConversion(Base); - - return CheckPlaceholderExpr(Base); + Result = DefaultFunctionArrayLvalueConversion(Base); + else + Result = CheckPlaceholderExpr(Base); + if (Result.isInvalid()) + return Result; + return BuildHyperobjectLookup(Result.get(), IsArrow); } /// Look up the given member of the given non-type-dependent diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 32c9215184eba5..4db42b602671b5 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -12,6 +12,7 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DeclObjC.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" @@ -1405,6 +1406,8 @@ void InitListChecker::CheckSubElementType(const InitializedEntity &Entity, return CheckReferenceType(Entity, IList, ElemType, Index, StructuredList, StructuredIndex); + ElemType = ElemType.stripHyperobject(); + if (InitListExpr *SubInitList = dyn_cast(expr)) { if (SubInitList->getNumInits() == 1 && IsStringInit(SubInitList->getInit(0), ElemType, SemaRef.Context) == @@ -3653,6 +3656,7 @@ void InitializationSequence::Step::Destroy() { case SK_OCLSamplerInit: case SK_OCLZeroOpaqueType: case SK_ParenthesizedListInit: + case SK_ViewLookup: break; case SK_ConversionSequence: @@ -3773,6 +3777,13 @@ void InitializationSequence::AddFinalCopy(QualType T) { Steps.push_back(S); } +void InitializationSequence::AddViewLookup(QualType T) { + Step S; + S.Kind = SK_ViewLookup; + S.Type = T; + Steps.push_back(S); +} + void InitializationSequence::AddExtraneousCopyToTemporary(QualType T) { Step S; S.Kind = SK_ExtraneousCopyToTemporary; @@ -4975,6 +4986,14 @@ static void TryReferenceInitializationCore(Sema &S, QualType DestType = Entity.getType(); SourceLocation DeclLoc = Initializer->getBeginLoc(); + // OpenCilk: If the right hand side is a hyperobject, see if the + // left hand side wants the hyperobject or a view. + if (T2->isHyperobjectType() && !T1->isHyperobjectType()) { + Sequence.AddViewLookup(T1); + T2 = T2.stripHyperobject(); + cv2T2 = cv2T2.stripHyperobject(); + } + // Compute some basic properties of the types and the initializer. bool isLValueRef = DestType->isLValueReferenceType(); bool isRValueRef = !isLValueRef; @@ -6137,7 +6156,18 @@ void InitializationSequence::InitializeFrom(Sema &S, bool TopLevelOfInitList, bool TreatUnavailableAsInvalid) { ASTContext &Context = S.Context; - + // Peel off any CilkSpawnExpr at the start of the arguments. + if (Args.size() == 1) + if (CilkSpawnExpr *E = dyn_cast(Args[0])) { + IsSpawned = true; + SpawnLoc = E->getExprLoc(); + Args[0] = E->getSpawnedExpr(); + if (ExprWithCleanups *EWC = + dyn_cast(E->getSpawnedExpr())) { + S.Cleanup.setExprNeedsCleanups(true); + Args[0] = EWC->getSubExpr(); + } + } // Eliminate non-overload placeholder types in the arguments. We // need to do this before checking whether types are dependent // because lowering a pseudo-object expression might well give us @@ -8585,12 +8615,15 @@ ExprResult InitializationSequence::Perform(Sema &S, !Kind.isExplicitCast()) { // Rebuild the ParenListExpr. SourceRange ParenRange = Kind.getParenOrBraceRange(); + assert(!IsSpawned && "ParenListExpr is spawned"); return S.ActOnParenListExpr(ParenRange.getBegin(), ParenRange.getEnd(), Args); } assert(Kind.getKind() == InitializationKind::IK_Copy || Kind.isExplicitCast() || Kind.getKind() == InitializationKind::IK_DirectList); + if (IsSpawned) + return S.ActOnCilkSpawnExpr(SpawnLoc, Args[0]); return ExprResult(Args[0]); } @@ -8667,6 +8700,7 @@ ExprResult InitializationSequence::Perform(Sema &S, case SK_QualificationConversionPRValue: case SK_FunctionReferenceConversion: case SK_AtomicConversion: + case SK_ViewLookup: case SK_ConversionSequence: case SK_ConversionSequenceNoNarrowing: case SK_ListInitialization: @@ -8846,6 +8880,10 @@ ExprResult InitializationSequence::Perform(Sema &S, /*IsExtraneousCopy=*/true); break; + case SK_ViewLookup: + CurInit = S.BuildHyperobjectLookup(CurInit.get()); + break; + case SK_UserConversion: { // We have a user-defined conversion that invokes either a constructor // or a conversion function. @@ -9419,6 +9457,9 @@ ExprResult InitializationSequence::Perform(Sema &S, CheckMoveOnConstruction(S, Init, Entity.getKind() == InitializedEntity::EK_Result); + // Push a spawn back onto the init if necessary. + if (IsSpawned) + return S.ActOnCilkSpawnExpr(SpawnLoc, Init); return Init; } @@ -10209,6 +10250,10 @@ void InitializationSequence::dump(raw_ostream &OS) const { OS << "resolve address of overloaded function"; break; + case SK_ViewLookup: + OS << "lookup hyperobject view"; + break; + case SK_CastDerivedToBasePRValue: OS << "derived-to-base (prvalue)"; break; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index d1ff688c2a21d0..dd0b30bb418dc2 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -3102,6 +3102,9 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) { case Type::Pointer: T = cast(T)->getPointeeType().getTypePtr(); continue; + case Type::Hyperobject: + T = cast(T)->getElementType().getTypePtr(); + continue; case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index aef8dc58a48dbc..3b09258a05ec17 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -17,6 +17,7 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DependenceFlags.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/Type.h" @@ -129,6 +130,7 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) { ICR_Exact_Match, ICR_Exact_Match, ICR_Exact_Match, + ICR_Exact_Match, ICR_Promotion, ICR_Promotion, ICR_Promotion, @@ -170,6 +172,7 @@ static const char* GetImplicitConversionName(ImplicitConversionKind Kind) { "Lvalue-to-rvalue", "Array-to-pointer", "Function-to-pointer", + "Hyperobject to view", "Function pointer conversion", "Qualification", "Integral promotion", @@ -1555,7 +1558,7 @@ TryImplicitConversion(Sema &S, Expr *From, QualType ToType, // given Conversion rank, in spite of the fact that a copy/move // constructor (i.e., a user-defined conversion function) is // called for those cases. - QualType FromType = From->getType(); + QualType FromType = From->getType().stripHyperobject(); if (ToType->getAs() && FromType->getAs() && (S.Context.hasSameUnqualifiedType(FromType, ToType) || S.IsDerivedFrom(From->getBeginLoc(), FromType, ToType))) { @@ -1813,7 +1816,7 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, StandardConversionSequence &SCS, bool CStyle, bool AllowObjCWritebackConversion) { - QualType FromType = From->getType(); + QualType FromType = From->getType().stripHyperobject(); // Standard conversions (C++ [conv]) SCS.setAsIdentityConversion(); @@ -4879,6 +4882,14 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType, T2 = Fn->getType(); } + // OpenCilk: If the right hand side is a hyperobject, see if the + // left hand side wants the hyperobject or a view. + bool LookupView = false; + if (T2->isHyperobjectType() && !T1->isHyperobjectType()) { + LookupView = true; + T2 = T2.stripHyperobject(); + } + // Compute some basic properties of the types and the initializer. bool isRValRef = DeclType->isRValueReferenceType(); Expr::Classification InitCategory = Init->Classify(S.Context); @@ -4939,6 +4950,8 @@ TryReferenceInit(Sema &S, Expr *Init, QualType DeclType, // in which case the implicit conversion sequence is a // derived-to-base Conversion (13.3.3.1). SetAsReferenceBinding(/*BindsDirectly=*/true); + if (LookupView) + ICS.Standard.First = ICK_Hyperobject_To_View; // Nothing more to do: the inaccessibility/ambiguity check for // derived-to-base conversions is suppressed when we're @@ -5800,6 +5813,7 @@ static bool CheckConvertedConstantConversions(Sema &S, case ICK_Lvalue_To_Rvalue: case ICK_Array_To_Pointer: case ICK_Function_To_Pointer: + case ICK_Hyperobject_To_View: llvm_unreachable("found a first conversion kind in Second"); case ICK_Function_Conversion: @@ -13652,6 +13666,9 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc, if (checkPlaceholderForOverload(*this, Input)) return ExprError(); + if (Input->getType()->isHyperobjectType()) + Input = BuildHyperobjectLookup(Input, false); + Expr *Args[2] = { Input, nullptr }; unsigned NumArgs = 1; @@ -13820,6 +13837,11 @@ void Sema::LookupOverloadedBinOp(OverloadCandidateSet &CandidateSet, OverloadedOperatorKind Op, const UnresolvedSetImpl &Fns, ArrayRef Args, bool PerformADL) { + assert(Args[0]->getType()->getTypeClass() != Type::Hyperobject && + "hyperobjects not allowed in overloading"); + assert(Args[1]->getType()->getTypeClass() != Type::Hyperobject && + "hyperobjects not allowed in overloading"); + SourceLocation OpLoc = CandidateSet.getLocation(); OverloadedOperatorKind ExtraOp = @@ -13984,6 +14006,22 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, OverloadCandidateSet::iterator Best; switch (CandidateSet.BestViableFunction(*this, OpLoc, Best)) { case OR_Success: { + // If the RHS is spawned and the operator is an assignment, then we + // actually want to spawn the the top-level call. + bool SpawnTheCall = false; + SourceLocation SpawnLoc; + // TODO: Generalize this condition. + if (BinaryOperator::isAssignmentOp(Opc)) { + if (CilkSpawnExpr *Spawn = dyn_cast(Args[1])) { + SpawnTheCall = true; + SpawnLoc = Spawn->getExprLoc(); + if (ExprWithCleanups *EWC = + dyn_cast(Spawn->getSpawnedExpr())) + Args[1] = RHS = EWC->getSubExpr(); + else + Args[1] = RHS = Spawn->getSpawnedExpr(); + } + } // We found a built-in operator or an overloaded operator. FunctionDecl *FnDecl = Best->Function; @@ -14202,6 +14240,9 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, if (Best->RewriteKind != CRK_None) R = new (Context) CXXRewrittenBinaryOperator(R.get(), IsReversed); + if (SpawnTheCall) + return ActOnCilkSpawnExpr(SpawnLoc, R.get()); + return R; } else { // We matched a built-in operator. Convert the arguments, then diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp index 70a549938d080d..75a98b7ad5b77f 100644 --- a/clang/lib/Sema/SemaStmt.cpp +++ b/clang/lib/Sema/SemaStmt.cpp @@ -225,6 +225,10 @@ void Sema::DiagnoseUnusedExprResult(const Stmt *S, unsigned DiagID) { if (!E) return; + // Ignore _Cilk_spawn when diagnosing unused expression. + if (const CilkSpawnExpr *CSE = dyn_cast(E)) + E = CSE->getSpawnedExpr()->IgnoreImplicit(); + // If we are in an unevaluated expression context, then there can be no unused // results because the results aren't expected to be used in the first place. if (isUnevaluatedContext()) @@ -1984,6 +1988,71 @@ namespace { return false; } + // DeclFinder checks to see if the decls are used in a given + // expressison. + class DeclFinder : public EvaluatedExprVisitor { + llvm::SmallPtrSetImpl &Decls; + bool FoundDecl; + + public: + typedef EvaluatedExprVisitor Inherited; + + DeclFinder(Sema &S, llvm::SmallPtrSetImpl &Decls, + Stmt *Statement) : + Inherited(S.Context), Decls(Decls), FoundDecl(false) { + if (!Statement) return; + Visit(Statement); + } + + void VisitCastExpr(CastExpr *E) { + if (E->getCastKind() == CK_LValueToRValue) + CheckLValueToRValueCast(E->getSubExpr()); + else + Visit(E->getSubExpr()); + } + + void CheckLValueToRValueCast(Expr *E) { + E = E->IgnoreParenImpCasts(); + + if (ConditionalOperator *CO = dyn_cast(E)) { + Visit(CO->getCond()); + CheckLValueToRValueCast(CO->getTrueExpr()); + CheckLValueToRValueCast(CO->getFalseExpr()); + return; + } + + if (BinaryConditionalOperator *BCO = + dyn_cast(E)) { + CheckLValueToRValueCast(BCO->getOpaqueValue()->getSourceExpr()); + CheckLValueToRValueCast(BCO->getFalseExpr()); + return; + } + + Visit(E); + } + + void VisitDeclRefExpr(DeclRefExpr *E) { + if (VarDecl *VD = dyn_cast(E->getDecl())) + if (Decls.count(VD)) + FoundDecl = true; + } + + void VisitPseudoObjectExpr(PseudoObjectExpr *POE) { + // Only need to visit the semantics for POE. + // SyntaticForm doesn't really use the Decal. + for (auto *S : POE->semantics()) { + if (auto *OVE = dyn_cast(S)) + // Look past the OVE into the expression it binds. + Visit(OVE->getSourceExpr()); + else + Visit(S); + } + } + + bool FoundDeclInUse() { return FoundDecl; } + + }; // end class DeclFinder + // A visitor to determine if a continue or break statement is a // subexpression. class BreakContinueFinder : public ConstEvaluatedExprVisitor { @@ -2062,6 +2131,19 @@ namespace { Visit(Collection); } + void VisitCilkForStmt(const CilkForStmt *S) { + // Only visit the init statement of a _Cilk_for loop; the body + // has a different break/continue scope. + if (const Stmt *Init = S->getInit()) + Visit(Init); + if (const Stmt *Limit = S->getLimitStmt()) + Visit(Limit); + if (const Stmt *Begin = S->getBeginStmt()) + Visit(Begin); + if (const Stmt *End = S->getEndStmt()) + Visit(End); + } + bool ContinueFound() { return ContinueLoc.isValid(); } bool BreakFound() { return BreakLoc.isValid(); } SourceLocation GetContinueLoc() { return ContinueLoc; } @@ -2109,7 +2191,6 @@ namespace { } // end namespace - void Sema::CheckBreakContinueBinding(Expr *E) { if (!E || getLangOpts().CPlusPlus) return; @@ -3355,6 +3436,560 @@ Sema::ActOnBreakStmt(SourceLocation BreakLoc, Scope *CurScope) { return new (Context) BreakStmt(BreakLoc); } +/// Return the stride expression from the increment portion of a _Cilk_for loop +/// that satisfies one of the following formats: +/// +/// var += +/// var -= +/// +/// Return null if the increment does not satisfy one of the specified formats. +static std::pair +GetCilkForStride(Sema &S, llvm::SmallPtrSetImpl &Decls, + Expr *Increment) { + auto Invalid = std::make_pair(nullptr, false); + if (const CompoundAssignOperator *CAO = + dyn_cast_or_null(Increment)) { + // Only get an expression to extract if Increment is in a canonical form + // with Decls only in the LHS. + bool DeclUseInRHS = + DeclFinder(S, Decls, CAO->getRHS()).FoundDeclInUse(); + bool DeclUseInLHS = + DeclFinder(S, Decls, CAO->getLHS()).FoundDeclInUse(); + if (!(DeclUseInLHS && !DeclUseInRHS)) + return Invalid; + + // TODO: Check this. + switch(CAO->getOpcode()) { + default: return Invalid; + case BO_AddAssign: + return std::make_pair(CAO->getRHS(), false); + case BO_SubAssign: + return std::make_pair(CAO->getRHS(), true); + } + } + return Invalid; +} + +// Check the _Cilk_for initialization statement. Returns true on error. +static bool CheckCilkForInit(Sema &S, SourceLocation &CilkForLoc, Stmt *First) { + if (!First) { + S.Diag(CilkForLoc, diag::err_cilk_for_initializer_expected_decl); + return true; + } + if (!isa(First)) { + S.Diag(First->getBeginLoc(), diag::err_cilk_for_initializer_expected_decl); + return true; + } + return false; +} + +/// Rewrite the loop control of simple _Cilk_for loops into a form that LLVM +/// will have an easier time analyzing. The transformation looks as follows: +/// +/// _Cilk_for (loop-var = init-expr; +/// loop-var relation-compare end-expr; +/// loop-var compound-assign stride-expr) +/// body-stmt +/// => +/// _Cilk_for (__begin = 0, __end = modified-end-expr; +/// __begin relation-compare __end; +/// __begin-update-expr) +/// { loop-var = __begin * stride-expr; body-stmt } +/// +/// where +/// +/// modified-end-expr := (range-expr / stride-expr) + 1 +/// +/// and +/// +/// range-expr := end-expr - init-expr - 1 if relation-compare is LT or GT +/// range-expr := end-expr - init-expr if relation-cpmpare is LE or GE +/// +/// Essentially, we treat simple _Cilk_for loops as syntactic sugar for slightly +/// more complex loops that often match the programmer's intuition as to how the +/// loop should behave. +StmtResult Sema::HandleSimpleCilkForStmt(SourceLocation CilkForLoc, + SourceLocation LParenLoc, + Stmt *First, + Expr *Condition, + Expr *Increment, + SourceLocation RParenLoc, + Stmt *Body) { + Scope *S = getCurScope(); + + // Get the single loop variable declared. + DeclStmt *LoopVarDS = dyn_cast(First); + if (!LoopVarDS || !LoopVarDS->isSingleDecl()) + return StmtEmpty(); + VarDecl *LoopVar = dyn_cast(LoopVarDS->getSingleDecl()); + if (!LoopVar) + return StmtEmpty(); + + // Get the loop variable initialization. + Expr *LoopVarInit = LoopVar->getInit(); + if (!LoopVarInit) { + Diag(First->getBeginLoc(), + diag::err_cilk_for_control_variable_not_initialized); + return StmtEmpty(); + } + + // Get the loop-limit expression, which the loop variable is compared against. + // TODO: Generlize this logic to handle complex conditions, e.g., class + // methods instead of integer binary operators. + BinaryOperator *Cond = dyn_cast_or_null(Condition); + if (!Cond || !Cond->isComparisonOp()) + return StmtEmpty(); + + llvm::SmallPtrSet Decls; + Decls.insert(LoopVar); + bool DeclUseInRHS = + DeclFinder(*this, Decls, Cond->getRHS()).FoundDeclInUse(); + bool DeclUseInLHS = + DeclFinder(*this, Decls, Cond->getLHS()).FoundDeclInUse(); + if ((DeclUseInLHS && DeclUseInRHS) || (!DeclUseInLHS && !DeclUseInRHS)) + return StmtEmpty(); + + // Get the loop-limit expression. + Expr *LimitExpr = nullptr; + if (DeclUseInLHS) + LimitExpr = Cond->getRHS(); + else // if (DeclUseInRHS) + LimitExpr = Cond->getLHS(); + if (!LimitExpr) + return StmtEmpty(); + + // Get the loop stride. + Expr *Stride = nullptr; + bool StrideIsNegative = false; + if (const UnaryOperator *UO = + dyn_cast_or_null(Increment)) { + if (UO->isIncrementOp()) + Stride = ActOnIntegerConstant(Increment->getExprLoc(), 1).get(); + else if (UO->isDecrementOp()) { + Stride = ActOnIntegerConstant(Increment->getExprLoc(), 1).get(); + StrideIsNegative = true; + } + } else { + auto StrideWithSign = GetCilkForStride(*this, Decls, Increment); + StrideIsNegative = StrideWithSign.second; + Stride = StrideWithSign.first; + } + if (!Stride) + return StmtEmpty(); + + // Determine the type of comparison. + // + // TODO? For now, this function only recognizes relational comparisons (LT, + // GT, LE, GE), assuming that, if the programmer uses anything else, then they + // will do the right thing themselves. This behavior might be worth + // generalizing in the future. + bool CompareUpperLimit = false; + bool CompareInclusive = false; + switch (Cond->getOpcode()) { + default: + return StmtEmpty(); + case BO_LE: + CompareInclusive = true; + LLVM_FALLTHROUGH; + case BO_LT: + CompareUpperLimit = DeclUseInLHS; + break; + case BO_GE: + CompareInclusive = true; + LLVM_FALLTHROUGH; + case BO_GT: + CompareUpperLimit = DeclUseInRHS; + break; + case BO_NE: + CompareInclusive = true; + break; + } + + // Create a declaration for the initialization of this loop, to ensure its + // evaluated just once. + QualType LoopVarTy = LoopVar->getType(); + SourceLocation InitLoc = LoopVarInit->getBeginLoc(); + // Add declaration to store the old loop var initialization. + VarDecl *InitVar = BuildForRangeVarDecl(*this, InitLoc, + LoopVarTy, "__init"); + AddInitializerToDecl(InitVar, LoopVarInit, /*DirectInit=*/false); + FinalizeDeclaration(InitVar); + CurContext->addHiddenDecl(InitVar); + + // Create a declaration for the limit of this loop, to ensure its evaluated + // just once. + SourceLocation LimitLoc = LimitExpr->getBeginLoc(); + VarDecl *LimitVar = BuildForRangeVarDecl(*this, LimitLoc, + LoopVarTy, "__limit"); + AddInitializerToDecl(LimitVar, LimitExpr, /*DirectInit=*/false); + FinalizeDeclaration(LimitVar); + CurContext->addHiddenDecl(LimitVar); + + DeclGroupPtrTy InitGroup = + BuildDeclaratorGroup(MutableArrayRef((Decl **)&InitVar, 1)); + StmtResult NewInit = ActOnDeclStmt(InitGroup, InitLoc, InitLoc); + if (NewInit.isInvalid()) + return StmtError(); + + DeclGroupPtrTy LimitGroup = + BuildDeclaratorGroup(MutableArrayRef((Decl **)&LimitVar, 1)); + StmtResult LimitDecl = ActOnDeclStmt(LimitGroup, LimitLoc, LimitLoc); + if (LimitDecl.isInvalid()) + return StmtError(); + + ExprResult InitRef = BuildDeclRefExpr(InitVar, LoopVarTy, VK_LValue, + InitLoc); + ExprResult LimitRef = BuildDeclRefExpr(LimitVar, LimitVar->getType(), + VK_LValue, LimitLoc); + + // LimitVar should have the correct type, because it's derived from the + // original condition. Hence we only need to cast InitRef. + ExprResult CastInit = ImplicitCastExpr::Create( + Context, LimitVar->getType(), CK_IntegralCast, InitRef.get(), nullptr, + VK_XValue, FPOptionsOverride()); + + // Compute a check that this _Cilk_for loop executes at all. + SourceLocation CondLoc = Cond->getExprLoc(); + ExprResult InitCond; + if (DeclUseInLHS) + InitCond = BuildBinOp(S, CondLoc, Cond->getOpcode(), CastInit.get(), + LimitRef.get()); + else // DeclUseInRHS + InitCond = BuildBinOp(S, CondLoc, Cond->getOpcode(), LimitRef.get(), + CastInit.get()); + if (InitCond.isInvalid()) { + llvm_unreachable("Invalid InitCond"); + return StmtError(); + } + + // Compute the range of this _Cilk_for loop. + ExprResult Range; + if (!StrideIsNegative) + // range = limit - init. + Range = BuildBinOp(S, CondLoc, BO_Sub, LimitRef.get(), CastInit.get()); + else + // range = init - limit. + Range = BuildBinOp(S, CondLoc, BO_Sub, CastInit.get(), LimitRef.get()); + if (Range.isInvalid()) + return StmtError(); + + // At this point, we have confirmed that this loop is a simple _Cilk_for loop. + // Now rewrite the loop control. + + // If the comparison is not inclusive, reduce the Range by 1. + if (!CompareInclusive) + Range = BuildBinOp(S, CondLoc, BO_Sub, Range.get(), + ActOnIntegerConstant(CilkForLoc, 1).get()); + + // Build Range/Stride. + ExprResult NewLimit = BuildBinOp(S, CondLoc, BO_Div, Range.get(), Stride); + + // If the comparison is not an equality, build Range/Stride + 1 + if (!CompareInclusive) + NewLimit = BuildBinOp(S, CondLoc, BO_Add, NewLimit.get(), + ActOnIntegerConstant(CilkForLoc, 1).get()); + + // The range is the result of subtracting the loop bounds + // and should be an integer. + QualType CountTy = NewLimit.get()->getType(); + + // Create new declarations for replacement loop control variables. + // Declaration for new beginning loop control variable. + VarDecl *BeginVar = BuildForRangeVarDecl(*this, CondLoc, CountTy, + "__begin"); + AddInitializerToDecl(BeginVar, ActOnIntegerConstant(CondLoc, 0).get(), + /*DirectInit=*/false); + FinalizeDeclaration(BeginVar); + CurContext->addHiddenDecl(BeginVar); + // Declaration for new end loop control variable. + VarDecl *EndVar = BuildForRangeVarDecl(*this, CondLoc, CountTy, "__end"); + AddInitializerToDecl(EndVar, NewLimit.get(), /*DirectInit=*/false); + FinalizeDeclaration(EndVar); + CurContext->addHiddenDecl(EndVar); + + DeclGroupPtrTy BeginGroup = + BuildDeclaratorGroup(MutableArrayRef((Decl **)&BeginVar, 1)); + StmtResult BeginStmt = ActOnDeclStmt(BeginGroup, CondLoc, CondLoc); + if (BeginStmt.isInvalid()) + return StmtError(); + + DeclGroupPtrTy EndGroup = + BuildDeclaratorGroup(MutableArrayRef((Decl **)&EndVar, 1)); + StmtResult EndStmt = ActOnDeclStmt(EndGroup, CondLoc, CondLoc); + if (EndStmt.isInvalid()) + return StmtError(); + + // Replace the comparison in the main loop with a comparison on the new loop + // control variables. + + // Create a new condition expression that uses the new VarDecl + // in place of the lifted expression. + ExprResult BeginRef = BuildDeclRefExpr(BeginVar, CountTy, VK_LValue, CondLoc); + ExprResult EndRef = BuildDeclRefExpr(EndVar, CountTy, VK_LValue, CondLoc); + ExprResult NewCond; + if (CompareUpperLimit == DeclUseInLHS) + NewCond = BuildBinOp(S, CondLoc, Cond->getOpcode(), BeginRef.get(), + EndRef.get()); + else + NewCond = BuildBinOp(S, CondLoc, Cond->getOpcode(), EndRef.get(), + BeginRef.get()); + if (NewCond.isInvalid()) + return StmtError(); + + // Create a new increment operation on the new beginning variable, and add it + // to the existing increment operation. + ExprResult NewInc; + SourceLocation IncLoc = Increment->getExprLoc(); + if (const CompoundAssignOperator *CAO = + dyn_cast(Increment)) { + switch (CAO->getOpcode()) { + default: break; // Should not reach this case if we have a Stride. + case BO_AddAssign: + NewInc = BuildUnaryOp(S, IncLoc, UO_PreInc, BeginRef.get()); + break; + case BO_SubAssign: + NewInc = BuildUnaryOp(S, IncLoc, UO_PreInc, BeginRef.get()); + break; + } + } else if (const UnaryOperator *UO = + dyn_cast_or_null(Increment)) { + if (UO->isIncrementOp()) + NewInc = BuildUnaryOp(S, IncLoc, UO_PreInc, BeginRef.get()); + else if (UO->isDecrementOp()) + NewInc = BuildUnaryOp(S, IncLoc, UO_PreInc, BeginRef.get()); + } + if (NewInc.isInvalid()) + return StmtError(); + + // Return a new statement for initializing the old loop variable. + SourceLocation LoopVarLoc = LoopVar->getBeginLoc(); + ExprResult NewLoopVarInit = + BuildBinOp(S, LoopVarLoc, StrideIsNegative ? BO_Sub : BO_Add, InitRef.get(), + BuildBinOp(S, LoopVarLoc, BO_Mul, + BeginRef.get(), Stride).get()); + if (!NewLoopVarInit.isInvalid()) + AddInitializerToDecl(LoopVar, NewLoopVarInit.get(), /*DirectInit=*/false); + + return new (Context) CilkForStmt( + NewInit.get(), cast(LimitDecl.get()), InitCond.get(), + cast(BeginStmt.get()), cast(EndStmt.get()), + NewCond.get(), NewInc.get(), LoopVarDS, Body, CilkForLoc, LParenLoc, + RParenLoc); +} + +/// Examine the condition of the _Cilk_for loop to lift the evaluation of the +/// end condition of a _Cilk_for loop out of the loop. Intuitively, this +/// routine transforms _Cilk_for loops as follows: +/// +/// _Cilk_for(loop-var-decl; +/// loop-var-expr comparison-op end-expr; +/// => +/// _Cilk_for(loop-var-decl, __end = end-expr; +/// loop-var-expr comparison-op __end; +/// +/// Here, loop-var-expr can use variables declared in loop-var-decl, while +/// end-expr must not use any such variables. In general, the loop condition +/// can swap the positions of loop-var-expr and end-expr. +StmtResult Sema::LiftCilkForLoopLimit(SourceLocation CilkForLoc, + Stmt *First, Expr **Second) { + if (!First || !Second || !*Second) + return StmtEmpty(); + + // Get the single loop variable declared. + DeclStmt *LoopVarDS = dyn_cast(First); + if (!LoopVarDS || !LoopVarDS->isSingleDecl()) + return StmtEmpty(); + VarDecl *LoopVar = dyn_cast(LoopVarDS->getSingleDecl()); + if (!LoopVar) + return StmtEmpty(); + + // // Extract decls from First. If First is not a decl statement, give + // // up. + // llvm::SmallPtrSet Decls; + // if (DeclStmt *DS = dyn_cast(First)) { + // for (auto *DI : DS->decls()) { + // VarDecl *VD = dyn_cast(DI); + // Decls.insert(VD); + // } + // } else { + // return StmtEmpty(); + // } + + // Only get an expression to extract if Decl's appear in just one + // side of a comparison. + BinaryOperator *E = dyn_cast(*Second); + if (!E || !E->isComparisonOp()) + return StmtEmpty(); + + llvm::SmallPtrSet Decls; + Decls.insert(LoopVar); + bool DeclUseInRHS = DeclFinder(*this, Decls, E->getRHS()).FoundDeclInUse(); + bool DeclUseInLHS = DeclFinder(*this, Decls, E->getLHS()).FoundDeclInUse(); + Expr *ToExtract = nullptr; + if ((DeclUseInLHS && DeclUseInRHS) || (!DeclUseInLHS && !DeclUseInRHS)) + return StmtEmpty(); + + // Get the expression to lift. + if (DeclUseInLHS) + ToExtract = E->getRHS(); + else if (DeclUseInRHS) + ToExtract = E->getLHS(); + assert(ToExtract && "No Expr to extract."); + + // Create a new VarDecl that stores the result of the lifted + // expression. + Scope *S = getCurScope(); + SourceLocation EndLoc = ToExtract->getBeginLoc(); + QualType EndType = LoopVar->getType(); + + // Hijacking this method for handling range loops to build the + // declaration for the end of the loop. + VarDecl *EndVar = BuildForRangeVarDecl(*this, EndLoc, EndType, "__end"); + AddInitializerToDecl(EndVar, ToExtract, /*DirectInit=*/false); + FinalizeDeclaration(EndVar); + CurContext->addHiddenDecl(EndVar); + + // Combine declarations into a single DeclStmt. + SmallVector NewDecls; + NewDecls.push_back(LoopVar); + NewDecls.push_back(EndVar); + DeclGroupPtrTy DG = BuildDeclaratorGroup(MutableArrayRef(NewDecls)); + StmtResult NewInit = ActOnDeclStmt(DG, CilkForLoc, CilkForLoc); + if (NewInit.isInvalid()) + return StmtError(); + + // // Create a Decl statement for the new VarDecl. + // StmtResult EndDeclStmt = + // ActOnDeclStmt(ConvertDeclToDeclGroup(EndVar), EndLoc, EndLoc); + + // Create a new condition expression that uses the new VarDecl + // in place of the lifted expression. + ExprResult EndRef = BuildDeclRefExpr(EndVar, EndType, VK_LValue, EndLoc); + ExprResult NewCondExpr; + if (DeclUseInLHS) + NewCondExpr = BuildBinOp(S, E->getOperatorLoc(), E->getOpcode(), + E->getLHS(), EndRef.get()); + else if (DeclUseInRHS) + NewCondExpr = BuildBinOp(S, E->getOperatorLoc(), E->getOpcode(), + EndRef.get(), E->getRHS()); + + *Second = NewCondExpr.get(); + return NewInit; +} + +// Borrowed from SemaDeclCXX.cpp and modified. +static void SearchForReturnInStmt(Sema &Self, Stmt *S) { + if (isa(S)) + Self.Diag(S->getBeginLoc(), + diag::err_cilk_for_cannot_return); + + for (Stmt *SubStmt : S->children()) { + if (!SubStmt) + continue; + if (!isa(SubStmt)) + SearchForReturnInStmt(Self, SubStmt); + } +} + +StmtResult +Sema::ActOnCilkForStmt(SourceLocation CilkForLoc, SourceLocation LParenLoc, + Stmt *First, DeclStmt *Limit, ConditionResult InitCond, + DeclStmt *Begin, DeclStmt *End, ConditionResult Second, + FullExprArg Third, SourceLocation RParenLoc, Stmt *Body, + DeclStmt *LoopVar) { + if (CheckCilkForInit(*this, CilkForLoc, First)) + return StmtResult(); + + // if (!getLangOpts().CPlusPlus) { + if (DeclStmt *DS = dyn_cast_or_null(First)) { + // C99 6.8.5p3: The declaration part of a 'for' statement shall only + // declare identifiers for objects having storage class 'auto' or + // 'register'. + for (auto *DI : DS->decls()) { + VarDecl *VD = dyn_cast(DI); + if (VD && VD->isLocalVarDecl() && !VD->hasLocalStorage()) + VD = nullptr; + if (!VD) { + Diag(DI->getLocation(), diag::err_non_local_variable_decl_in_for); + DI->setInvalidDecl(); + } + } + } + // } + + CheckBreakContinueBinding(Second.get().second); + CheckBreakContinueBinding(Third.get()); + + // Check the condition of the _Cilk_for + Expr* Condition = Second.get().second; + if (!Condition) + return StmtError(Diag(CilkForLoc, diag::err_cilk_for_invalid_cond_expr)); + + CheckForLoopConditionalStatement(*this, Condition, Third.get(), Body); + CheckForRedundantIteration(*this, Third.get(), Body); + + // ExprResult SecondResult(second.release()); + // VarDecl *ConditionVar = nullptr; + // if (secondVar) { + // ConditionVar = cast(secondVar); + // SecondResult = CheckConditionVariable(ConditionVar, CilkForLoc, true); + // SecondResult = ActOnFinishFullExpr(SecondResult.get(), CilkForLoc); + // if (SecondResult.isInvalid()) + // return StmtError(); + // } + + Expr *Increment = Third.release().getAs(); + + if (isa(Body)) { + Diag(CilkForLoc, diag::warn_empty_cilk_for_body); + getCurCompoundScope().setHasEmptyLoopBodies(); + } + + SearchForReturnInStmt(*this, Body); + + if (BreakContinueFinder(*this, Body).BreakFound()) + Diag(CilkForLoc, diag::err_cilk_for_cannot_break); + // TODO: Check for other illegal statements in the _Cilk_for body, such as + // goto statements that leave the _Cilk_for body. + + setFunctionHasBranchProtectedScope(); + + if (LoopVar) + return new (Context) + CilkForStmt(First, Limit, InitCond.get().second, Begin, End, Condition, + Increment, LoopVar, Body, CilkForLoc, LParenLoc, RParenLoc); + + // Attempt to process this loop as a simple _Cilk_for loop. + StmtResult SimpleCilkFor = + HandleSimpleCilkForStmt(CilkForLoc, LParenLoc, First, Condition, Increment, + RParenLoc, Body); + if (!SimpleCilkFor.isInvalid() && !SimpleCilkFor.isUnset()) + return SimpleCilkFor; + + // HandleSimpleCilkForLoop(CilkForLoc, &First, &Condition, &Increment); + // if (NewLoopVarDS) { + // Stmt* NewBody = new (Context) CompoundStmt(Context, + // { NewLoopVarDS, Body }, + // LParenLoc, RParenLoc); + // return new (Context) CilkForStmt(Context, First, nullptr, + // Condition, Increment, NewBody, CilkForLoc, + // LParenLoc, RParenLoc); + // } + + // Attempt to find the loop limit and extract it into its own declaration. + StmtResult NewInit = LiftCilkForLoopLimit(CilkForLoc, First, &Condition); + if (NewInit.isInvalid()) + return NewInit; + + if (!NewInit.isUnset()) + return new (Context) CilkForStmt(NewInit.get(), nullptr, nullptr, nullptr, + nullptr, Condition, Increment, nullptr, + Body, CilkForLoc, LParenLoc, RParenLoc); + return new (Context) + CilkForStmt(First, nullptr, nullptr, nullptr, nullptr, Condition, + Increment, nullptr, Body, CilkForLoc, LParenLoc, RParenLoc); +} + /// Determine whether the given expression might be move-eligible or /// copy-elidable in either a (co_)return statement or throw expression, /// without considering function return type, if applicable. @@ -3905,6 +4540,11 @@ Sema::ActOnReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp, if (R.isInvalid() || ExprEvalContexts.back().isDiscardedStatementContext()) return R; + if (getLangOpts().getCilk() != LangOptions::Cilk_none) + if (const Expr *RV = cast(R.get())->getRetValue()) + if (isa(RV)) + Diag(ReturnLoc, diag::warn_return_cilk_spawn); + VarDecl *VD = const_cast(cast(R.get())->getNRVOCandidate()); diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index ad20bc8871f103..6a82f58ceb80f5 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -79,13 +79,20 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A, StringRef PragmaName = llvm::StringSwitch(PragmaNameLoc->Ident->getName()) .Cases("unroll", "nounroll", "unroll_and_jam", "nounroll_and_jam", - PragmaNameLoc->Ident->getName()) + "cilk", PragmaNameLoc->Ident->getName()) .Default("clang loop"); + if ((PragmaName == "cilk") && + (St->getStmtClass() != Stmt::CilkForStmtClass)) { + S.Diag(St->getBeginLoc(), diag::err_pragma_cilk_precedes_noncilk) + << "#pragma cilk"; + return nullptr; + } + // This could be handled automatically by adding a Subjects definition in // Attr.td, but that would make the diagnostic behavior worse in this case // because the user spells this attribute as a pragma. - if (!isa(St)) { + if (!isa(St)) { std::string Pragma = "#pragma " + std::string(PragmaName); S.Diag(St->getBeginLoc(), diag::err_pragma_loop_precedes_nonloop) << Pragma; return nullptr; @@ -116,6 +123,18 @@ static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A, SetHints(LoopHintAttr::UnrollAndJamCount, LoopHintAttr::Numeric); else SetHints(LoopHintAttr::UnrollAndJam, LoopHintAttr::Enable); + } else if (PragmaName == "cilk") { + Option = llvm::StringSwitch( + OptionLoc->Ident->getName()) + .Case("grainsize", LoopHintAttr::TapirGrainsize) + .Default(LoopHintAttr::TapirGrainsize); + if (Option == LoopHintAttr::TapirGrainsize) { + assert(ValueExpr && "Attribute must have a valid value expression."); + if (S.CheckLoopHintExpr(ValueExpr, St->getBeginLoc())) + return nullptr; + State = LoopHintAttr::Numeric; + } else + llvm_unreachable("bad loop hint"); } else { // #pragma clang loop ... assert(OptionLoc && OptionLoc->Ident && @@ -356,6 +375,9 @@ CheckForIncompatibleAttributes(Sema &S, // The vector predication only has a state form that is exposed by // #pragma clang loop vectorize_predicate (enable | disable). VectorizePredicate, + // The Tapir grainsize only has a numeric form that describes the + // amount to coarsen the parallel loop. + TapirGrainsize, // This serves as a indicator to how many category are listed in this enum. NumberOfCategories }; @@ -400,6 +422,9 @@ CheckForIncompatibleAttributes(Sema &S, case LoopHintAttr::PipelineInitiationInterval: Category = Pipeline; break; + case LoopHintAttr::TapirGrainsize: + Category = TapirGrainsize; + break; case LoopHintAttr::VectorizePredicate: Category = VectorizePredicate; break; diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index a1f0f5732b2b77..1ec22c64572071 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6251,6 +6251,10 @@ bool UnnamedLocalNoLinkageFinder::VisitComplexType(const ComplexType* T) { return Visit(T->getElementType()); } +bool UnnamedLocalNoLinkageFinder::VisitHyperobjectType(const HyperobjectType* T) { + return Visit(T->getElementType()); +} + bool UnnamedLocalNoLinkageFinder::VisitPointerType(const PointerType* T) { return Visit(T->getPointeeType()); } diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 31ea7be2975e49..66dfc419eea996 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -1626,6 +1626,9 @@ static Sema::TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( case Type::SubstTemplateTypeParmPack: llvm_unreachable("Type nodes handled above"); + case Type::Hyperobject: + llvm_unreachable("Implement me"); + case Type::Auto: // C++23 [temp.deduct.funcaddr]/3: // A placeholder type in the return type of a function template is a @@ -6182,6 +6185,13 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T, OnlyDeduced, Depth, Used); break; + case Type::Hyperobject: + if (!OnlyDeduced) + MarkUsedTemplateParameters(Ctx, + cast(T)->getElementType(), + OnlyDeduced, Depth, Used); + break; + case Type::Atomic: if (!OnlyDeduced) MarkUsedTemplateParameters(Ctx, diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp index dfcc78dafdc4c3..00d4c0778b153f 100644 --- a/clang/lib/Sema/SemaTemplateVariadic.cpp +++ b/clang/lib/Sema/SemaTemplateVariadic.cpp @@ -927,6 +927,12 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) { // These declarator chunks cannot contain any parameter packs. break; + case DeclaratorChunk::Hyperobject: + for (const Expr *Arg : Chunk.Hyper.Arg) + if (Arg->containsUnexpandedParameterPack()) + return true; + break; + case DeclaratorChunk::Array: if (Chunk.Arr.NumElts && Chunk.Arr.NumElts->containsUnexpandedParameterPack()) diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 0aa691d24171f3..700e227acbfd00 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -417,6 +417,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator, case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return result; // If we do find a function declarator, scan inwards from that, @@ -430,6 +431,7 @@ static DeclaratorChunk *maybeMovePastReturnType(Declarator &declarator, case DeclaratorChunk::Function: case DeclaratorChunk::Reference: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: continue; case DeclaratorChunk::MemberPointer: @@ -510,6 +512,7 @@ static void distributeObjCPointerTypeAttr(TypeProcessingState &state, case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: goto error; } } @@ -541,6 +544,7 @@ static void distributeObjCPointerTypeAttrFromDeclarator( case DeclaratorChunk::Paren: case DeclaratorChunk::Array: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: continue; case DeclaratorChunk::Function: @@ -602,6 +606,7 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state, case DeclaratorChunk::Reference: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: continue; } } @@ -1264,6 +1269,109 @@ TSTToUnaryTransformType(DeclSpec::TST SwitchTST) { } } +static std::optional DeclContainsHyperobject(const RecordDecl *Decl); + +// It is forbidden to add new bits to the Type class so there is no +// room for a cached or precomputed flag. Do a deep search on every +// hyperobject type creation. +static std::optional ContainsHyperobject(QualType Outer) { + const Type *T = Outer.getCanonicalType().getTypePtr(); + if (T->isVariablyModifiedType()) + return diag::variable_length_hyperobject; + if (T->isDependentType()) + return std::optional(); + QualType Inner; + switch (T->getTypeClass()) { + case Type::Hyperobject: + return diag::nested_hyperobject; + case Type::Typedef: + Inner = cast(T)->desugar(); + break; + case Type::ConstantArray: + case Type::IncompleteArray: + case Type::VariableArray: + case Type::DependentSizedArray: + Inner = cast(T)->getElementType(); + break; + case Type::Complex: + Inner = cast(T)->getElementType(); + break; + case Type::Record: { + const RecordDecl *Decl = cast(T)->getDecl(); + // TODO: There must be a better way to do this. + // A hyperobject might sneak in without being explicitly + // declared in the template. + if (auto Spec = dyn_cast(Decl)) { + if (ClassTemplateDecl *Inner = Spec->getSpecializedTemplate()) + if (auto O = DeclContainsHyperobject(Inner->getTemplatedDecl())) + return O; + const TemplateArgumentList &Args = Spec->getTemplateArgs(); + for (unsigned I = 0; I < Args.size(); ++I) { + const TemplateArgument &Arg = Args.get(I); + switch (Arg.getKind()) { + case TemplateArgument::Declaration: + if (auto O = ContainsHyperobject(Arg.getAsDecl()->getType())) + return O; + break; + case TemplateArgument::Type: + if (auto O = ContainsHyperobject(Arg.getAsType())) + return O; + break; + case TemplateArgument::Integral: + case TemplateArgument::NullPtr: + case TemplateArgument::Null: + break; + default: + return diag::confusing_hyperobject; + } + } + return std::optional(); + } + if (const RecordDecl *Def = Decl->getDefinition()) + return DeclContainsHyperobject(Def); + return diag::confusing_hyperobject; + } + case Type::TypeOf: + Inner = cast(T)->getUnmodifiedType(); + break; + case Type::TypeOfExpr: + Inner = cast(T)->getUnderlyingExpr()->getType(); + break; + case Type::Decltype: + Inner = cast(T)->getUnderlyingType(); + break; + case Type::Elaborated: + Inner = cast(T)->desugar(); + break; + case Type::Adjusted: + case Type::Decayed: + Inner = cast(T)->desugar(); + break; + case Type::Auto: + case Type::DeducedTemplateSpecialization: + Inner = cast(T)->desugar(); + break; + case Type::TemplateSpecialization: + case Type::DependentName: + case Type::DependentTemplateSpecialization: + case Type::PackExpansion: + case Type::UnaryTransform: + return diag::confusing_hyperobject; + case Type::Builtin: + case Type::TemplateTypeParm: + default: + return std::optional(); + } + return ContainsHyperobject(Inner); +} + +static std::optional DeclContainsHyperobject(const RecordDecl *Decl) { + for (const FieldDecl *FD : Decl->fields()) + if (std::optional O = ContainsHyperobject(FD->getType())) + return O; + return std::optional(); +} + /// Convert the specified declspec to the appropriate type /// object. /// \param state Specifies the declarator containing the declaration specifier @@ -2309,6 +2417,103 @@ QualType Sema::BuildReferenceType(QualType T, bool SpelledAsLValue, return Context.getRValueReferenceType(T); } +// Return value is always non-null. +Expr *Sema::ValidateReducerCallback(Expr *E, unsigned NumArgs, + SourceLocation Loc) { + if (!E) + E = new (Context) CXXNullPtrLiteralExpr(Context.NullPtrTy, Loc); + + QualType T = E->getType(); + + // If the type is dependent it will be checked again later, if necessary. + if (T->isDependentType() || T == Context.VoidPtrTy) + return E; + + if (T->isNullPtrType()) + return ImplicitCastExpr::Create(Context, Context.VoidPtrTy, + CK_NullToPointer, E, nullptr, VK_PRValue, + FPOptionsOverride()); + + if (T->isFunctionType()) { + E = ImplicitCastExpr::Create(Context, Context.getPointerType(T), + CK_FunctionToPointerDecay, E, nullptr, + VK_PRValue, FPOptionsOverride()); + T = E->getType(); // Context.getDecayedType(T); + } + + CastKind Cast = CK_BitCast; + + if (const IntegerLiteral *L = dyn_cast(E)) { + if (L->getValue().isZero()) + return ImplicitCastExpr::Create(Context, Context.VoidPtrTy, + CK_NullToPointer, E, nullptr, VK_PRValue, + FPOptionsOverride()); + Cast = CK_IntegralToPointer; + } + + // TODO: The compiler should allow + // Ptr = Context.getPointerType(Element) + // and generate a thunk that accepts void *. + + QualType Ptr = Context.VoidPtrTy; + llvm::SmallVector ArgTy; + ArgTy.push_back(Ptr); + if (NumArgs > 1) { + ArgTy.push_back(Ptr); + assert(NumArgs == 2); + } + // TODO: Give these types names for better error messages. + QualType FnTy = + BuildFunctionType(Context.VoidTy, ArgTy, E->getExprLoc(), + DeclarationName(), FunctionProtoType::ExtProtoInfo()); + FnTy = BuildPointerType(FnTy, E->getExprLoc(), DeclarationName()); + + if (T == Context.OverloadTy) { + DeclAccessPair What; + bool Multiple = false; + if (FunctionDecl *F = ResolveAddressOfOverloadedFunction(E, FnTy, true, + What, &Multiple)) { + T = F->getType(); + E = BuildDeclRefExpr(F, T, VK_LValue, E->getExprLoc()); + T = Context.getPointerType(T); + E = ImplicitCastExpr::Create(Context, T, CK_FunctionToPointerDecay, E, + nullptr, VK_PRValue, FPOptionsOverride()); + } + } + + AssignConvertType Mismatch = + CheckAssignmentConstraints(E->getExprLoc(), FnTy, T); + + if (DiagnoseAssignmentResult(Mismatch, E->getExprLoc(), FnTy, T, E, + AA_Passing)) { + E = new (Context) CXXNullPtrLiteralExpr(Context.NullPtrTy, E->getExprLoc()); + Cast = CK_NullToPointer; + } else if (Mismatch == IntToPointer) { + Cast = CK_IntegralToPointer; + } + + return ImplicitCastExpr::Create(Context, Context.VoidPtrTy, Cast, E, nullptr, + VK_PRValue, FPOptionsOverride()); +} + +QualType Sema::BuildHyperobjectType(QualType Element, Expr *Identity, + Expr *Reduce, SourceLocation Loc) { + QualType Result = Element; + if (!RequireCompleteType(Loc, Element, CompleteTypeKind::Normal, + diag::incomplete_hyperobject)) { + if (std::optional Code = ContainsHyperobject(Result)) + Diag(Loc, *Code) << Result; + } + + Identity = ValidateReducerCallback(Identity, 1, Loc); + Reduce = ValidateReducerCallback(Reduce, 2, Loc); + + // The result of this function must be HyperobjectType if it is called + // from C++ template instantiation when rebuilding an existing hyperobject + // type. + return Context.getHyperobjectType(Result, Identity, Reduce); +} + /// Build a Read-only Pipe type. /// /// \param T The type to which we'll be building a Pipe. @@ -3216,6 +3421,7 @@ static void inferARCWriteback(TypeProcessingState &state, case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return; } } @@ -3358,6 +3564,7 @@ static void diagnoseRedundantReturnTypeQualifiers(Sema &S, QualType RetTy, case DeclaratorChunk::Array: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: // FIXME: We can't currently provide an accurate source location and a // fix-it hint for these. unsigned AtomicQual = RetTy->isAtomicType() ? DeclSpec::TQ_atomic : 0; @@ -3974,6 +4181,8 @@ static void warnAboutRedundantParens(Sema &S, Declarator &D, QualType T) { CouldBeTemporaryObject = false; continue; + case DeclaratorChunk::Hyperobject: + // No idea where Hyperobject belongs. case DeclaratorChunk::BlockPointer: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: @@ -4245,6 +4454,7 @@ classifyPointerDeclarator(Sema &S, QualType type, Declarator &declarator, case DeclaratorChunk::Function: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: break; case DeclaratorChunk::BlockPointer: @@ -4575,6 +4785,7 @@ static bool hasOuterPointerLikeChunk(const Declarator &D, unsigned endIndex) { case DeclaratorChunk::Function: case DeclaratorChunk::BlockPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: // These are invalid anyway, so just ignore. break; } @@ -4704,6 +4915,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, DiagKind = 2; break; case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: break; } @@ -4760,6 +4972,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, case DeclaratorChunk::Array: case DeclaratorChunk::Function: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: break; case DeclaratorChunk::BlockPointer: @@ -5715,6 +5928,12 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, D.getMutableDeclSpec().getAttributes()); break; } + + case DeclaratorChunk::Hyperobject: { + T = S.BuildHyperobjectType(T, DeclType.Hyper.Arg[0], + DeclType.Hyper.Arg[1], DeclType.Loc); + break; + } } if (T.isNull()) { @@ -6082,6 +6301,7 @@ static void transferARCOwnership(TypeProcessingState &state, case DeclaratorChunk::Function: case DeclaratorChunk::MemberPointer: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return; } } @@ -6506,6 +6726,9 @@ namespace { void VisitMatrixTypeLoc(MatrixTypeLoc TL) { fillMatrixTypeLoc(TL, Chunk.getAttrs()); } + void VisitHyperobjectTypeLoc(HyperobjectTypeLoc TL) { + TL.setHyperLoc(Chunk.Loc); + } void VisitTypeLoc(TypeLoc TL) { llvm_unreachable("unsupported TypeLoc kind in declarator!"); @@ -6520,6 +6743,7 @@ static void fillAtomicQualLoc(AtomicTypeLoc ATL, const DeclaratorChunk &Chunk) { case DeclaratorChunk::Array: case DeclaratorChunk::Paren: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: llvm_unreachable("cannot be _Atomic qualified"); case DeclaratorChunk::Pointer: @@ -7706,6 +7930,7 @@ static bool distributeNullabilityTypeAttr(TypeProcessingState &state, // Don't walk through these. case DeclaratorChunk::Reference: case DeclaratorChunk::Pipe: + case DeclaratorChunk::Hyperobject: return false; } } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 097e81ea7d45a2..40fdf9b89fa4c7 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -19,12 +19,14 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" +#include "clang/AST/ExprCilk.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/OpenMPClause.h" #include "clang/AST/Stmt.h" +#include "clang/AST/StmtCilk.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" #include "clang/AST/StmtOpenMP.h" @@ -1269,6 +1271,9 @@ class TreeTransform { QualType RebuildDependentBitIntType(bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc); + QualType RebuildHyperobjectType(QualType ElementType, Expr *R, + Expr *I, SourceLocation Loc); + /// Build a new template name given a nested name specifier, a flag /// indicating whether the "template" keyword was provided, and the template /// that the template name refers to. @@ -1450,6 +1455,22 @@ class TreeTransform { Inc, RParenLoc, Body); } + /// Build a new Cilk for statement. + /// + /// By default, performs semantic analysis to build the new statement. + /// Subclasses may override this routine to provide different behavior. + StmtResult RebuildCilkForStmt(SourceLocation ForLoc, SourceLocation LParenLoc, + Stmt *Init, Stmt *Limit, + Sema::ConditionResult InitCond, Stmt *Begin, + Stmt *End, Sema::ConditionResult Cond, + Sema::FullExprArg Inc, SourceLocation RParenLoc, + Stmt *LoopVar, Stmt *Body) { + return getSema().ActOnCilkForStmt( + ForLoc, LParenLoc, Init, cast_or_null(Limit), InitCond, + cast_or_null(Begin), cast_or_null(End), Cond, Inc, + RParenLoc, Body, cast_or_null(LoopVar)); + } + /// Build a new goto statement. /// /// By default, performs semantic analysis to build the new statement. @@ -1477,6 +1498,30 @@ class TreeTransform { return getSema().BuildReturnStmt(ReturnLoc, Result); } + /// Build a new Cilk spawn statment. + /// + /// By default, performs semantic analysis to build the new expression. + /// Subclasses may override this routine to provide different behavior. + StmtResult RebuildCilkSpawnStmt(SourceLocation SpawnLoc, Stmt *S) { + return getSema().ActOnCilkSpawnStmt(SpawnLoc, S); + } + + /// Build a new Cilk spawn expression. + /// + /// By default, performs semantic analysis to build the new expression. + /// Subclasses may override this routine to provide different behavior. + ExprResult RebuildCilkSpawnExpr(SourceLocation SpawnLoc, Expr *E) { + return getSema().ActOnCilkSpawnExpr(SpawnLoc, E); + } + + /// Build a new Cilk scope statment. + /// + /// By default, performs semantic analysis to build the new expression. + /// Subclasses may override this routine to provide different behavior. + StmtResult RebuildCilkScopeStmt(SourceLocation ScopeLoc, Stmt *S) { + return getSema().ActOnCilkScopeStmt(ScopeLoc, S); + } + /// Build a new declaration statement. /// /// By default, performs semantic analysis to build the new statement. @@ -5149,6 +5194,34 @@ QualType TreeTransform::TransformComplexType(TypeLocBuilder &TLB, return TransformTypeSpecType(TLB, T); } +template +QualType TreeTransform::TransformHyperobjectType + (TypeLocBuilder &TLB, HyperobjectTypeLoc TL) { + ExprResult NewR, NewI, NewD; + + { + const HyperobjectType *H = TL.getTypePtr(); + EnterExpressionEvaluationContext Context( + SemaRef, Sema::ExpressionEvaluationContext::PotentiallyEvaluated); + NewR = getDerived().TransformExpr(H->getReduce()); + NewI = getDerived().TransformExpr(H->getIdentity()); + } + if (NewR.isInvalid() || NewI.isInvalid()) + return QualType(); + + QualType ElementType = getDerived().TransformType(TLB, TL.getPointeeLoc()); + if (ElementType.isNull()) + return QualType(); + + QualType Result = + getDerived().RebuildHyperobjectType(ElementType, NewI.get(), + NewR.get(), TL.getHyperLoc()); + + HyperobjectTypeLoc NewT = TLB.push(Result); + NewT.setHyperLoc(TL.getHyperLoc()); + return Result; +} + template QualType TreeTransform::TransformAdjustedType(TypeLocBuilder &TLB, AdjustedTypeLoc TL) { @@ -15121,6 +15194,13 @@ QualType TreeTransform::RebuildDependentBitIntType( return SemaRef.BuildBitIntType(IsUnsigned, NumBitsExpr, Loc); } +template +QualType TreeTransform::RebuildHyperobjectType(QualType ElementType, + Expr *I, Expr *R, + SourceLocation Loc) { + return SemaRef.BuildHyperobjectType(ElementType, I, R, Loc); +} + template TemplateName TreeTransform::RebuildTemplateName(CXXScopeSpec &SS, @@ -15339,6 +15419,146 @@ TreeTransform::TransformCapturedStmt(CapturedStmt *S) { return getSema().ActOnCapturedRegionEnd(Body.get()); } +template +StmtResult +TreeTransform::TransformCilkSpawnStmt(CilkSpawnStmt *S) { + StmtResult Child = getDerived().TransformStmt(S->getSpawnedStmt()); + if (Child.isInvalid()) + return StmtError(); + + if (!getDerived().AlwaysRebuild() && Child.get() == S->getSpawnedStmt()) + return S; + + return getDerived().RebuildCilkSpawnStmt(S->getSpawnLoc(), Child.get()); +} + +template +ExprResult +TreeTransform::TransformCilkSpawnExpr(CilkSpawnExpr *E) { + ExprResult SpawnedExpr = getDerived().TransformExpr(E->getSpawnedExpr()); + if (SpawnedExpr.isInvalid()) + return ExprError(); + + if (!getDerived().AlwaysRebuild() && SpawnedExpr.get() == E->getSpawnedExpr()) + return E; + + return getDerived().RebuildCilkSpawnExpr(E->getSpawnLoc(), SpawnedExpr.get()); +} + +template +StmtResult +TreeTransform::TransformCilkSyncStmt(CilkSyncStmt *S) { + return S; +} + +template +StmtResult +TreeTransform::TransformCilkScopeStmt(CilkScopeStmt *S) { + StmtResult Child = getDerived().TransformStmt(S->getBody()); + if (Child.isInvalid()) + return StmtError(); + + if (!getDerived().AlwaysRebuild() && Child.get() == S->getBody()) + return S; + + return getDerived().RebuildCilkScopeStmt(S->getScopeLoc(), Child.get()); +} + +template +StmtResult +TreeTransform::TransformCilkForStmt(CilkForStmt *S) { + // Transform the initialization statement + StmtResult Init = getDerived().TransformStmt(S->getInit()); + if (Init.isInvalid()) + return StmtError(); + + // Transform the limit statement + StmtResult Limit; + if (S->getLimitStmt()) { + Limit = getDerived().TransformStmt(S->getLimitStmt()); + if (Limit.isInvalid()) + return StmtError(); + } + + // Transform the init-condition statement + Sema::ConditionResult InitCond; + if (S->getInitCond()) { + InitCond = getDerived().TransformCondition( + S->getCilkForLoc(), nullptr, S->getInitCond(), + Sema::ConditionKind::Boolean); + if (InitCond.isInvalid()) + return StmtError(); + } + + // Transform the begin statement + StmtResult Begin; + if (S->getBeginStmt()) { + Begin = getDerived().TransformStmt(S->getBeginStmt()); + if (Begin.isInvalid()) + return StmtError(); + } + + // Transform the end statement + StmtResult End; + if (S->getEndStmt()) { + End = getDerived().TransformStmt(S->getEndStmt()); + if (End.isInvalid()) + return StmtError(); + } + + // // In OpenMP loop region loop control variable must be captured and be + // // private. Perform analysis of first part (if any). + // if (getSema().getLangOpts().OpenMP && Init.isUsable()) + // getSema().ActOnOpenMPLoopInitialization(S->getCilkForLoc(), Init.get()); + + // Transform the condition + Sema::ConditionResult Cond = getDerived().TransformCondition( + S->getCilkForLoc(), nullptr, S->getCond(), + Sema::ConditionKind::Boolean); + if (Cond.isInvalid()) + return StmtError(); + + // Transform the increment + ExprResult Inc = getDerived().TransformExpr(S->getInc()); + if (Inc.isInvalid()) + return StmtError(); + + Sema::FullExprArg FullInc(getSema().MakeFullDiscardedValueExpr(Inc.get())); + if (S->getInc() && !FullInc.get()) + return StmtError(); + + // Transform the extracted loop-variable declaration + StmtResult LoopVar; + if (DeclStmt *LV = S->getLoopVarStmt()) { + LoopVar = getDerived().TransformStmt(LV); + if (LoopVar.isInvalid()) + return StmtError(); + } + + // Transform loop body + StmtResult Body = getDerived().TransformStmt(S->getBody()); + if (Body.isInvalid()) + return StmtError(); + + if (!getDerived().AlwaysRebuild() && + Init.get() == S->getInit() && + Limit.get() == S->getLimitStmt() && + InitCond.get() == std::make_pair((clang::VarDecl*)nullptr, + S->getInitCond()) && + Begin.get() == S->getBeginStmt() && + End.get() == S->getEndStmt() && + Cond.get() == std::make_pair((clang::VarDecl*)nullptr, S->getCond()) && + Inc.get() == S->getInc() && + LoopVar.get() == S->getLoopVarStmt() && + Body.get() == S->getBody()) + return S; + + return getDerived().RebuildCilkForStmt( + S->getCilkForLoc(), S->getLParenLoc(), Init.get(), Limit.get(), + InitCond, Begin.get(), End.get(), Cond, FullInc, S->getRParenLoc(), + LoopVar.get(), Body.get()); +} + } // end namespace clang #endif // LLVM_CLANG_LIB_SEMA_TREETRANSFORM_H diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 5f756961c6e1d0..937bd23a122153 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6650,6 +6650,10 @@ void TypeLocReader::VisitComplexTypeLoc(ComplexTypeLoc TL) { TL.setNameLoc(readSourceLocation()); } +void TypeLocReader::VisitHyperobjectTypeLoc(HyperobjectTypeLoc TL) { + TL.setHyperLoc(readSourceLocation()); +} + void TypeLocReader::VisitPointerTypeLoc(PointerTypeLoc TL) { TL.setStarLoc(readSourceLocation()); } diff --git a/clang/lib/Serialization/ASTReaderStmt.cpp b/clang/lib/Serialization/ASTReaderStmt.cpp index 96307c35ad32c0..17eb0aa9ad0ab8 100644 --- a/clang/lib/Serialization/ASTReaderStmt.cpp +++ b/clang/lib/Serialization/ASTReaderStmt.cpp @@ -2725,6 +2725,49 @@ void ASTStmtReader::VisitOMPTargetParallelGenericLoopDirective( VisitOMPLoopDirective(D); } +//===----------------------------------------------------------------------===// +// Cilk spawn, Cilk sync, Cilk for, Cilk scope +//===----------------------------------------------------------------------===// + +void ASTStmtReader::VisitCilkSpawnStmt(CilkSpawnStmt *S) { + VisitStmt(S); + S->setSpawnLoc(readSourceLocation()); + S->setSpawnedStmt(Record.readSubStmt()); +} + +void ASTStmtReader::VisitCilkSpawnExpr(CilkSpawnExpr *E) { + VisitExpr(E); + E->setSpawnLoc(readSourceLocation()); + E->setSpawnedExpr(Record.readSubExpr()); +} + +void ASTStmtReader::VisitCilkSyncStmt(CilkSyncStmt *S) { + VisitStmt(S); + S->setSyncLoc(readSourceLocation()); +} + +void ASTStmtReader::VisitCilkScopeStmt(CilkScopeStmt *S) { + VisitStmt(S); + S->setScopeLoc(readSourceLocation()); + S->setBody(Record.readSubStmt()); +} + +void ASTStmtReader::VisitCilkForStmt(CilkForStmt *S) { + VisitStmt(S); + S->setInit(Record.readSubStmt()); + S->setLimitStmt(Record.readSubStmt()); + S->setInitCond(Record.readSubExpr()); + S->setBeginStmt(Record.readSubStmt()); + S->setEndStmt(Record.readSubStmt()); + S->setCond(Record.readSubExpr()); + S->setInc(Record.readSubExpr()); + S->setLoopVarStmt(Record.readSubStmt()); + S->setBody(Record.readSubStmt()); + S->setCilkForLoc(readSourceLocation()); + S->setLParenLoc(readSourceLocation()); + S->setRParenLoc(readSourceLocation()); +} + //===----------------------------------------------------------------------===// // ASTReader Implementation //===----------------------------------------------------------------------===// @@ -2927,6 +2970,26 @@ Stmt *ASTReader::ReadStmtFromStream(ModuleFile &F) { S = SYCLUniqueStableNameExpr::CreateEmpty(Context); break; + case STMT_CILKSPAWN: + S = new (Context) CilkSpawnStmt(Empty); + break; + + case EXPR_CILKSPAWN: + S = new (Context) CilkSpawnExpr(Empty); + break; + + case STMT_CILKSYNC: + S = new (Context) CilkSyncStmt(Empty); + break; + + case STMT_CILKFOR: + S = new (Context) CilkForStmt(Empty); + break; + + case STMT_CILKSCOPE: + S = new (Context) CilkScopeStmt(Empty); + break; + case EXPR_PREDEFINED: S = PredefinedExpr::CreateEmpty( Context, diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 26279d399b53a9..b442684fd57c1f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -305,6 +305,10 @@ void TypeLocWriter::VisitComplexTypeLoc(ComplexTypeLoc TL) { addSourceLocation(TL.getNameLoc()); } +void TypeLocWriter::VisitHyperobjectTypeLoc(HyperobjectTypeLoc TL) { + Record.AddSourceLocation(TL.getHyperLoc()); +} + void TypeLocWriter::VisitPointerTypeLoc(PointerTypeLoc TL) { addSourceLocation(TL.getStarLoc()); } diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 896e24c8a13de6..26d6d91b0edf01 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -2156,6 +2156,53 @@ void ASTStmtWriter::VisitAsTypeExpr(AsTypeExpr *E) { Code = serialization::EXPR_ASTYPE; } +//===----------------------------------------------------------------------===// +// Cilk spawn, Cilk sync, Cilk for +//===----------------------------------------------------------------------===// +void ASTStmtWriter::VisitCilkSpawnStmt(CilkSpawnStmt *S) { + VisitStmt(S); + Record.AddSourceLocation(S->getSpawnLoc()); + Record.AddStmt(S->getSpawnedStmt()); + Code = serialization::STMT_CILKSPAWN; +} + +void ASTStmtWriter::VisitCilkSpawnExpr(CilkSpawnExpr *E) { + VisitExpr(E); + Record.AddSourceLocation(E->getSpawnLoc()); + Record.AddStmt(E->getSpawnedExpr()); + Code = serialization::EXPR_CILKSPAWN; +} + +void ASTStmtWriter::VisitCilkSyncStmt(CilkSyncStmt *S) { + VisitStmt(S); + Record.AddSourceLocation(S->getSyncLoc()); + Code = serialization::STMT_CILKSYNC; +} + +void ASTStmtWriter::VisitCilkScopeStmt(CilkScopeStmt *S) { + VisitStmt(S); + Record.AddSourceLocation(S->getScopeLoc()); + Record.AddStmt(S->getBody()); + Code = serialization::STMT_CILKSCOPE; +} + +void ASTStmtWriter::VisitCilkForStmt(CilkForStmt *S) { + VisitStmt(S); + Record.AddStmt(S->getInit()); + Record.AddStmt(S->getLimitStmt()); + Record.AddStmt(S->getInitCond()); + Record.AddStmt(S->getBeginStmt()); + Record.AddStmt(S->getEndStmt()); + Record.AddStmt(S->getCond()); + Record.AddStmt(S->getInc()); + Record.AddStmt(S->getLoopVarStmt()); + Record.AddStmt(S->getBody()); + Record.AddSourceLocation(S->getCilkForLoc()); + Record.AddSourceLocation(S->getLParenLoc()); + Record.AddSourceLocation(S->getRParenLoc()); + Code = serialization::STMT_CILKFOR; +} + //===----------------------------------------------------------------------===// // Microsoft Expressions and Statements. //===----------------------------------------------------------------------===// diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 144f034a9dfeff..7744ac38d1ac8f 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -1730,6 +1730,10 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::PackExpansionExprClass: case Stmt::SubstNonTypeTemplateParmPackExprClass: case Stmt::FunctionParmPackExprClass: + case Stmt::CilkSpawnStmtClass: + case Expr::CilkSpawnExprClass: + case Stmt::CilkSyncStmtClass: + case Stmt::CilkScopeStmtClass: case Stmt::CoroutineBodyStmtClass: case Stmt::CoawaitExprClass: case Stmt::DependentCoawaitExprClass: @@ -1840,6 +1844,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, case Stmt::SwitchStmtClass: case Stmt::WhileStmtClass: case Expr::MSDependentExistsStmtClass: + case Stmt::CilkForStmtClass: llvm_unreachable("Stmt should not be in analyzer evaluation loop"); case Stmt::ImplicitValueInitExprClass: // These nodes are shared in the CFG and would case caching out. diff --git a/clang/test/Cilk/Inputs/libopencilk-abi.bc b/clang/test/Cilk/Inputs/libopencilk-abi.bc new file mode 100644 index 0000000000000000000000000000000000000000..1c909f71be18ac76d07ef6cbeeb3224dae8a424b GIT binary patch literal 30464 zcmb__30PBC+VIIj5+H=Is6iGlixg2I5=3Ot1kj?QMp1`Wr%8aYRF(+AZ8~ih6spkT zQj0B4gW8tXc8a5Iv2|v`q90n+xU?U&Oh2r&WjeNl)%LSfTmSbxHw)U%_y6DX{CPs| zJ?}l|J@0wLLe7TW? z3+0LMCp1UzQuXr}38YDLcXR5+Q@unX5&WHnrmzLvtlhp@BhsYcRFQ0nPf|@|uAwh7 zIw!E9Nvhh-E$$Z`4Q8?zHZo^8Nj3Gd__+9%Sx>OqCPnpoGldID zE6FKlI^+Ml*%}!QO?yip1NjWwuek?;LYtX+|J`G0~9kD;S0>vr2$j( z{$iSOHt}xL<<$O`9jC8Hwrgd%`8nQu?%m`5Dehv-Pr~n|yMkis2!-LlaN`!sff?zOk1%VY5lL7sBMi;GD(MHM5$NWxml~Msm+PZ&Tkc0a@I|gYGYi4X&6pG za{viG7+82Icty{M@6PBI0XKu5S7ro#mOL%h7L66>y_*_1A>cJ$Ku(m|u9eI94HwIj zCpRyQZeCcM+L#-&d$&wZdwpuEiSrLpW7JB{P3cSSd2KJt|`EQ@gZLArkjqcsM zV9epm@8&83R`6!rv*hLn8dCcx9rx~)rbTgOPi}vq@gL6zMgdaGCo>x+e8?=jKL2t+ zfPD=4c5g2?JRTY8pS)}j*W@D_ql3P8@1?&tinMpUUJZG2T6o4|Z#^Wv-EwepU-?}Y zXLtGbrxu%+|4w|iP#wlAp=^G*PwAa=JG%D*@zY8#$~em2NRk=%6^AJ3D4I`sn?8TO z-YR>toEtlBCa5j_TwgIuoJxmMJ!2V}@z-Qd(5{cP(PK}j%`0C2&T>&XyRTatB;s%X zTlIe{WMX?a3FeEu-k;Lgn0JtscuZK|*peXmBJa5o&pr$|m8=e(3V`lnwNWb%c7^a8 zCoVl6^U7xtGS~1}t)KbO%Eq{9KHIXC4#NvRuLexyTO@p0VdQg*%O9_|nqH4hb$lqY z@bt|LGID>1*fupoGJef=qOPf3B4pPbTbG1&eTAQq&BUfX!+cmSXKXefu+z7JE$ zy_7zm;Q&|22&n|e5SkNx=N^1GQsOP*^@NQ?hFxtByGkNHjSL?$N1RnfkWHI~o8p9~ zvIe7}Aw}CzliFa=Hl%bm)X0QeHw(9vS$~%+tg#7=WSB5R*TS!wA^^!SC*o3f`1y9f ztC3-&oN%(WrC}=|X%%i2HEbwoNHGbwY!+^fYuI3G*woUHB5l|z7uLohwk^5B-^U4w zKe0sgs3Oj{hmE%To#sS*#jy)BI;|<4yfh-(DiUr!{> zZZrPZi2qoNJ+9=fAOr*D^l5Y)?&z1g_a>!?$$zt5uwE;~f?IO`&!H&yzonMczzI|{2#L!7 zyJ5-a4#9Qe{~c+|p4){4q&4w6RkL^FP^S=%+{^AyQb`OWHqB!OR1rPx;b+_ZhOdQ# zN{qILjc^PNn+y##q;X@ea8ns?V_ZY6NVu_#_dB;fbTt?m;Z~)PNJvlkr|o{DfP@n< z3_5u&?5Zui#~k6{*o0d;5Xlx>gF)6%SH|1i(y+jI~SDdSi#T0q^C%f zGU|WejA+y5h7=f}NVwUAc2`?w-3WTF6oPTJN1Wv_Uzj6$bYUL>S5*<8f}QHZMz4jB zwEOk7M+|g_f!SWwMqE-&Im@w+ADVT8sbMPt<-cL`yJ13V-6l5B9tK?fN(HDQOtWP| z;7m#Zuh!nMrA3%hVBJ_Id_rrT-P*7v4vSk_VO&_@3A;5N?6CuEry)fq+}t4?4m%4R zP(@tQg_->L(d2Mv_v{z$SB#%Y3%GQi&0kd8(qJfSr~%n+hGBw35N;vhL%=G@LDZJ3 zrtq`ZfUxkZcF!2Ui45;Y1u*eUph#srkd(nB+zh&u+K^HPvIAcP3iA~pDU1M#pVozg zLmRnU7K08RXD#&Wvf&i#6Q?7 zl?7RfEGAJ5^Q2sTk5n{JAE|Ej&c2=zRKKv533KqTHOBHCeASLv zaf5uRRHExKOO;e}`XMtkg<26X8~=u*M{;9F}ah z2{&k+4WV+S+;;;b+-$pTl17FU_vh{kP)66EDeo!yvMjAMRQsAtV_a!5^i6>(qb3M8*I`4`ItR zX$0~QmN1sFFaM9NNvRblDtzDp2dlksJYQ`&H>Gc~ECnnC}PsmXjA(l76|;lmybQ<=>Urq{W5FA>!rzq%=b(!f&M{$#GiZeyrAM`_VhHJ~C`bhi=pk^K+pMVrz3H9VnZS^{JsBg}>5WkAZ93%xBW>6O^guX`6V|kv z*Ik!NZrCNCt0lMO(cPC+5GW=MUJL&e2*?dd@3dwfw}#gDrZz#Y)SJ=OJr`22E4++> z+~XS{XVD5b00RpeLOwYyxuNy{dKg@w|L2f5G5+7HB~?<%b-4tJw}u6OAC`=$A;XIP z?1l~l+TgWGHzLC!`1D1Fg9Jw-r<5=U(obe*cJb!!way(VI0@2+!9uFLH5bw)FpMn> zA$P%=Ya())>_%~jcTG|*6K!tVwd-PzO&V96F)JzSnU=^yjj^LEEMOZ(x$wsDQoma{ zD-e? zxA@{r{=wi=+F;)z!GKD-cz5dZq~z1t>&_O&Z8u9>+L?tMLE!1I{uML~ zJHIP?COW|T`;4>onR^cA2HuMEE|dv2#FUcA#@ZP>`hvHcO{(HBVPV*ULgn+i)HCMP zrN-a|M{`c^Q8slm-iPBBzZ$bMYpQpkUt5x_=iQi7mehfE`3JLtbCbk1wrPHsW_TaZ zU3@&(Zz}(TAl>TcD*V4JSgl+^5{q?4UE-$dn)Qj*27OggdBys~qB6bSs4GjXsjD)U zSLzcHm{g=Guc|1o(ibNb{aTyCy7G$RnX{5zjcVN|GYiYBYKtm#Ri(^EeNAn7brmx! zIYFK#UV$STOl38;~=E{p^%~2E;73otHb7#+<_6zbWs%z_N^fQfH4Elt!Um-r5t&nfj z8S856s%pziVVF#Ll`%VSdH(8^3p6WNFI%=~*~6}4{_1@7f@L`ia$JCw3s$XOIljiq z1*)7!TvyeAVbQWRs=P(+2I}Sct6Z>(^1?*Qo#9AZBrn<7c=vRqO zbaA{=Us+wVMN?f>P!RS9KyP$X%L zzpRI1t4%1=Xk1k^8dS&{W38svs4H5pVLR7VfY=`F{6WqApLkuYFG(n?E#Iow7~MKh zQC(G({I#Q)NL~A3M=#v65}4o zuaI9-p)0Lr`KPO>&=_wsGJ_~2Ex8FI|k5eq5P>xzxK z@(TUtqFQ}L2@BMe7Z=slU~l8OS)(tmE7I41L=6;&z8FtSD=Lf|G!^BQ<;Dm5tf(&1 zRV0+r10pi&Ys+-S)tgwP6yRoRZCQ1V5iENnFlmdXw7R+&YUy{a3cjzZm=aoBqc>E;kG`r@Q?sGgSX*7Cuhmzv=MQ!U`1CbZ zx{8^VAlT}nndOxRU=*d$IikY4qV;-XZC$Ot81=2Ht`eAyb>TXD4TlPRM>}(#P=)i9 zs7z=Z;5?dDrEasvUEZVvvWq=Tq3i_w>mQ`Gvd*aA>?Dq!(;rRsrYg{WP;)(uA1w~G zomEkm$wfMYuBhC&1>vlGP+%;F%d0dnGNi|IhJ>?j1o&9K`Ik0tFqYMzwz8)0ENN;D z5Q2bgkjVoPs7=F zrmfMq2xQd>dPFHes6a2LVNG45sREO&fotnQ#K07=yIShCH5xs*Auuaw2JXm%jh4oh zU0%;stwz5Qu!386ip^>nR&hduq!o>nhNVf*`6Z^i_{ngX5y? zfmlqd)*5u1s+=*pLSF)rv%C;wS~l*avKqKUd&fbuog0t|lnl0V4Mx`o92pV>x{lEn z{4i3l!D=WDdQzjW(SfaLib}xc(x?rNs8+WTnt(!9Z$bw{ht77P-wY`cB7qZ%0|BgW z15lBkx=32-{*0eeNqLn}wh|eagM2eq8w#t9n35L%8r?HpLQg2e;O+Fk;IE3-Q^7&V zFLK#2L<~S*3roaidn>$f~eazcVTA4RSw(r&5?alWA`gxq(00`=fE zuoq2bbyYPmf@+Ynpg}5;Fdz~7t}ZF5)q_#c0uC-b%D%_cpXUM+3t)@e-{f=nBd?277YgQgVh31)m-VXjnD3Q5*|Fj$j{mE+15A#w|2>K0j~f@qON z*2IX40MODc;;I2)3Ta|UsoX+r0BAwLWB{xOKqmmkni*0F=UsRX7fBdg+yKq{yDOI7 zd}xwA>20Y(gHBLLr9zvRm{elYmo`L40uvFO< z=tO~F(~_@R;&IW}w#*|_|6_TUEmQLkFfWIKzk(j=))(A3DPw(f#PKOP|>#&_#hH8e6k4^ zmFWimuK8w`b{Lu@xAx-b0%F?^d5~Sd8xk zaNIcX%ehs*r4-+odykRX2>K z1QHX&wTK98c(ET8=;Y2kK7qlRK5+u=t1bsTn!&CAD70xXl)(NIBg%S;OJW2Bu0$#_ zx%~r6y9Sqft|U6I%;WZpx0w94fZ93RB*T@01tcNq(3CR;3#51~lz(XKA9$7|DTJvh zLLxN9i7bwni0K&)k*^78+Bq<&N?H@pg2%~CtNFuGiW$4%_yruZ1o=xF1{%E?#T;(R z9Ilo$S{Tt!-ehkGAw?57@AVI~#S~3Y;jyE;V1XkcDL(u)Od<{wYoO!|F60T}ch_>o&}iP4H(ybLUW zIf4p{RcG=h$?^rf;fEelgzJ=L=Wlf{&kGSNUphP>=AP{sP2Nm z8IXz<;_@iL<4L`pu|@V+g}5>c3oWtwPAHTY3M!M-oHi8)JKiS9*hBv8>Ua#A-xfQ^ z!5vm80=Q!e!0$NB`9S?=u0o6Wp_%fKU`FkCJ_Ep1Bra$!?qWEm+wdx0J|>va``y_= z6Bq5+dD-s1Y@Y_ey?$61EB6X!6bb%Zm|~+QWQQRNyhvWYvaDU-J_sdeca^U=>EjUdFwXx2Huno=!vnEO8ayJj8_Eg7-;D~ zH{@vk+fehTW6l3{_ngwpAp{j6cNV*qj%- z;}{dRXpXBtUl{+gm2~0-80cYXTGMN26~j)z%hI%GUPD1JC&sZHzHllJ=cw6_#$PG=TXbT8k8%K~za;ze=1M7?3M7`j)YPejpwI z=VtUh!vZ>33sGkK9_=-sy#XTr462|`UCFv(v?Zm(V73Al9Lzh?^oWiD7%nq~#d~gM z-+5vDF_6zFgj%9t-nFKmIF$Z=Z~C9Fr-N!IfNJla3Mxt3xrlZ=(T9A8t%8Gr^Oy*@ z%=*xi#7BwJJ(plwKDqJi9TzdlUYv)}aqPX2yH5(0oBSII(LxBNAEfNhWZvUU{Fl5a z(m@Fkf0jLO6dYgjU&*|+9ebTV0|sT{-7&OBC2HN4Q_s6&WC6fX>dCJY0i~ij)wEnz zzxYS+B9KI&E6A1%M+@37&4bnuNs&i;d3Tyr9ZmkA*)Tl0wSC?{dgpy}Yu*@0)&{v1 zxG+ev^2uRpO4y>U_p5a;C(YMV($Q5(j#!^t{5R;tDTrf|BYU2sy`t8m1Cks$^xWce zzzmiQbY|*!n+w5Z$&q8v`TPu*aVyyA>p%tZ1Ff%1j$D6k2VRb10kGpU*t+|g!zNm@|QbOc5I-xlpU>um-GgAy=%r9OVE$PZwWK`ej4;e%?)v}bKGOiu=Y2&iX! zv%lf#Ew`g3VrYjS^tPDxrY!P_?-RbeGM?dQF%ZZpZ*x1;Q-&75VUmc<1}ENEZCOtm zrXS?);EK#PCoHBt>nX$ZTOfltr@H0`#wYu{+mmABW-)mzvgSn{(rH7B&j1Nufmy-; zs7PLRi=?gI=eeFY??}0(3{+@mB=Q83xc02q4AZrI0mH|;Cr}25>wQl2ylD!M5p4n6 z3*1UT)N|m64xo8)hhzHCSs4g$2iwGe4089WJ=*t|D_r$LSAA@)b(hX9?vTzHIvWy6 zQt>WqFsRQ1W9xm$8?-tyd1iaopA6H>`SB#zS;H$RmhY=P;;xa{o^{MHeLX*kjIuS9 zK|%33zVGf{Xycl;vg1rnYR`I`t(40el2Q>Azt{KOnuQ{fxzkxGxjpMW!}NLeB(4Im zgFu%q?QkRv?eLEzVn~&o3ZtH5qkV&=6a~d!@qIVyVH?R}FnBwSLed2_%Yv~~+y=~Xz$o&FzsS=OPRJG3L*4Dudxl9JY*^_5E!A{Nb!dY_}c zZypYSBm)fs`tn=ep-vwGVpz}&At8fqz;x_yqe_;$C5KXmwAQ%nBtkla-J|D={Z z9T3vKYx2<*p(ieecCQFK7ZCp8#qet@BL03c;)jbxyH~;)-jQ*>H+h43$t^n#1FcRp>xKg}Su`jU zMc9eMOa5|u$#C$p&ypd%e?%LEo+$#Dp3u{pVT!XdClSsdkWF##mmsT zCUx~mIxuuOtJO~Gy21h=z5(yKX)j+!`|?UK_rBf|6o=K2)Df?Q=Xzi7aHgo@p0uM_ zZO|*B+4?%2Lb26`yb^Y?04+3>TZfnZGZ}QGRG0NKnf?|Rhk(2rpuGFm$-7SQL}b9V zkbt~@w6$fL-6zuwJY|^)qMBx>`3(RDA755_*XGB%Eo@G}Q|9GEw8pR#U^6e*j!_)B06{HgCQH@G*`UdHVgzq}wp>ad zYC+qYd?%R&;7S7c*VU7|RtZfW?|Im~ym%}=(kqaZAuiL%o_Svf&HwK5eDmI1P-XBG zrX%i~3`KanthP z>2Y3=HA4m93A!WCWxw^NXM?9ths!uRxV&_|=|b>yOgE5305rqx*Apnf<#gAZmQR>I z8#2{KXN^?Tvn-&)iDRzm0t@H|z<}xS-o-~(Eje*zN%yKe^imalAIJvI-bKA)yQt_} z5;Cyk0t_7ohMWw&StB?`(|@Ozax6S)>we?-I4|{fK;q-_>2(mLU38@NeK2`|CV;Ul znxfE~mjuTSCW=TrEUFl}h`KiP=4XOqE7L?K&@BX5LvMad0c1K0*b{n_E2K4?fJ31- z1BJ)Nr**A4C;PkKumwZ@&N{n-&VWQ8Io5YE*}mK(gQ`B?@ByC!0Hm>99249u6dvQQ z0gDG;h-Ngl@Z?bU8>R{1MPS6JtA7qi+#;W@MbQAGP!iRBJ0n;$kFscPh2HFOqLHy^ zOb?$t*~6j%SAuA+1tfN|X!-#SOu??hG7Dr|pQ|2ynRDMX)%2*FVvL!cYv6}=jA?}3 zU25Lkw{zye;VZv`}X8*>x z`ZpVAL;J4-5?`R=gvKyEd<@d!Zz!7_ecK;faeUk)gEwXC>WfsAPE`$Z1DxLckYua= zBGp1neiYK*o%+r1KmQGIAbLvtUjMgO&d~yxCh1Sm<%)uTNgs_cj8`2~nOhLe5gje_ zuAD^xYxA}Rj{>UvDe=XEcUCHF6cuuJO1x3<4pmXeG*F?28hqN#G<_e2O30);?OEko zYRS0(L0f;aqh<0Soxnh7Lm?fS5`Wx@2KZ{x zw0mAu`D(%YQENf6tl4Xy&V5vG`Xjv#L%=z~anLFF;D91`uPN=E{x$$mSq8LSAFc0R zQ*mxzB{&(srw>2(^7N_B^$smP4SbVhQkY3)f+ zj)G~s*M^(i^6}d-vPL$tquLXKc8u(kjnE~scpw9~cr5C~Y{3a!L^Jua!syJc+NJvA z6_`9zf5E%Cc6h>LpUviR_U4`0JCIjTS|&M%_qm&zf`r{`rP{}EuvAz(M<#Bc{-yJB zFM*Z=IU?m&4jTe&kJ)HV7^7G8i3yy4U>Zsl;bd-=zN-f=V`M?gKa*Rv$d0)>%f(r{ z*G~Rp*CZefKu#)7IJ28xb<)^sTpW~Ypye2}=!c5%K|B|8tCrfIaN|+zUK{#H2VACU zWG}J*1_?C%-btWAW5b3h1by_01)Mu>0W38Kx!-E)l6pxYv*sZM|EJkOkt9km0J%U@?^7S1 z5&KTwTX0~ntj*MV;O>7OxI5_&1M=2_r{RM2>E~o^1M*YdQ0u9W&ph?FMNePzZv5Q4 z@tk+#DeuO=dpCZ4Mx4FFT!1ZPjUS&e>=>IqCrUBrngYhG5dHKyGR9E|{$i)!tD@nW z=%nn3d62Bg+oDXq&-CYQxeaN6-LQise9b6sTvNc{d#7=`4x0x?gac3*P}&Qc&I~r4 zlOL8fo&xmpgsqICR{kiW7u(BzK*bo60lKCSn)?UB?ld8LEX|(}D!oW7UDYcAG!6~? z05*nXq`=rS1$nbW(kI5+pII?a027fY*pmW#gr46UY8)79ya1LZepNJTiB7Uc{{%vC zihQ2aRNA6!AB6P}>~DlsejwhIU}O}PQP`0}s-0L;w6H`WF1i%^M3R-*t^k2;A@UE4 zZ^-tO^fm1vM2W#zSqS9F$vo>yV34f@us9Ncah;zb<9erMy??x6!0xjcQavE#Yak?O zIwhnn>iClM{((2|h-ugMgjz=N1??KBf8Svq=-k-~7zVVRkGy!|;}^RhX*(z1i?V!E z$_NU7@okeen%QsWAa8BI*Vu`2b@;_|gmiAx(BfmA+txwxj@Medpu#SQ zYvop$EYsji&z;3Cb7(6U9&2O237y+Q0m6QVE$eLNG{eMjs9>YOL0*R48rsE~K4x7Y zG%D#=>p#W_s+hnyjz4_Ih64fK_Q;@hNhdel&dX@zFSgZZ+W3p1Z5N+@>z9o2!W-+S z;TNv*7kBZ)OEE(YKjg*Z6SHDJrNal&o&OYBzX*GWeGvQd74qkfr#k~GR+YUjas zK2#gy0U1jj{H2ciB|tD-f;yvE6HW@C+Gzc!fDk%ze)-c|w=IFIw)$yEd0e+ooJwc} zhNyrKt~zr&xj^J(XlUbjPH25Iatz074ouMb!0+ARz%k+v!!INL03S2qCHw-1T|CjU z^{)kfPyAUfvA;&UKePoYB&&kl-y2bZVc{&RP4I-cITMBnWVmQc806tOFtDp0FAZCA z9h~`g-eQ|MlO3ta64AxU09J!y*sTBrfo3?I=b#T6fT7cpY3DtRD$y3IG=i{{!rQ^QW|Tk|rtM={$B~h0lv8LfX!R z90*C0#n}I2yIA0(JQiu?kYxh$8u*P!FnnL1nmBl7L%6lT&DhS%`_r*R_%_)&AEQ{}Fu9!v9OP>voQserW2PY-$T)0GZ-WOvc$qe9 z20R4P(hEL;=or}bgJTeX!3?NMz{`;s8OPQmBHlhPY@sXMH|2z+06gm9{+1O!e9ThF zhtSw_qXXQ)0rx%7JE8F23Ao+j$Sc_;qvDqRE69s0h@9l8r1d}ZNfzvB9S{cgOQW(E zX6@-&cz{zf!F=FM)3g|@{}t5tub5}zDTDc{(d9&Y1@>@O^EbyTUQvEGEoLM{yCz^{ z|3IrM>4@k~UdaICH+jP3Z^iOu0XJ?eEG`LtB9;(sThw64GyUReG>8bUT~JoGMU8w6 zao|hzqM_UsFEY|vxhn1(twmkm!*7>*k!XRCShRBATIEad=w9}d@W7}4hM(Xez5!V< z(UuZrotjYyyu##vsVTKLc5_UOYye!@mr%I{TUgh@k3ZBPgCXR0pIYA6q*~$g^Z|~& zlM@ikmlvt`_7s&8`49tps72~lixmnVNTuCPDjluX*u4UATEd6iIC;Ny;T-w+51dR@ z*s?Exnv#|=C`Ahv1X#3c-;IVm8R-?&OQrUDj=WGA94QjKs}%+bl*^@y`&I-AuzwL> zt|KuU2EBROKAR~^Dz4%9TcFStskLiQ+h8Yn_*f>dB&0sFbynN}a|Q|v#XI8o-DKjo z_WiTs9L(SGy7%@FYn0!Pd7mguKiiZgVxLs)6Q?ei0<1iulDB%zAH4wd>T=*m_!r>c z6sSQu6{K}4QzG86D(<$J2;zNoou&{gm)sDOg<=WecWWp9Y@3fw0)g98{`D_-&bC!f zwicTHSbf^2-PU)WXa`NAHV#l$aP6pqj9yULPXp0qUgyq3T4vdgm0OdO;QFJHa9x(d zaLoidT+;!4Y%R2<;H7mOF_e--H=0z}e6}6nC?ia5a`K9wIFcce(BF1H(e)%C?6V1} z={#}(xDW}Hi-g<=(DQu?(LhnQ7f;rx&Do<|5O6UMUM*nxc2a;{`cMnAZGL#p_2`o0 zBKx22`bz`O8TpOF$=2n`y~T|3;#jotEI%_PH~!8x&R(!=bCZ6_eZYNz` zfm?Xs7{pgmTZg<5Z9T5+3-kblpq=Bk8ntr)Yv*B5{#847-Vo)o^Nr-ijUGGy7BUgH zoxk5JIKVkQE$hJF8V8PBF^_7>uTSd@*uSCSXD&Sc$qa@3VX3rnZYvYK(QS)0j+a0& zc#UX_2A3_W!4{1!TP$?jVo%s@v>S2|ezM)Rcm-`yIit5p72v;!wM8k~Vqb55DRBg4 zcdHeGZ5c6j0x+Q*rP$u2iu7L&+VPYm+j>qGe9QJ>2vIBLXieh&JqT$5l;TjSSxFRv zQyHtEW#+6HbxM)i!IQzIV(DOr)e^BIjc3Xsb9E#yRFjSnKDRwzZZeDZDZEz&2b{aTB#4Q7Du5&2K)f#I1(c-kBr`s^M?mpVRc^|t zs?1q%lx*jtjzBU9I$|2HBi4Yn!F?R>l@%Y@`0?-PH(P>1(5N54Y~m{}lybG$E2|HW z!qKG<7!N=ns2+sFIqUEH1mK39I-r63N%0;~xIGtcN3GG<5|{k<*^lk>$p3pWG5KZv zcxNCsi+m%Ny(-$cF*@mTAkFf@TfcF>_=~AmVdp_>Z7JQFqGG<#T9{Gn-Yx$PHk5z% zQz-vxm;AvOJA&$4)MuJhHv^vqnKr68^^ELFR+xx7yGTAdyC#`bDYA6W?CY-QID&Gz z*IfYbSfPX$z<|446t=O4$Y#YImh!FZTCG|vq!K7R(q+8|?~0g`a~r%+$3LKR-Dw*k z={%DkIF$Wy;6i%L?E89zYU#OP-I_yW1z*dHB(3K}D>*0&FCG?W6n^A%g(e@yGxopo1W3k8Xis9L~qB0k+kC7LT-N$Q>tsOe$v9-2s<7`c} z6KxG7QTLG~>DI&|pb6sUR~3@t*u-IQ99rIn(#`u^!cdQN^UZ@+Z%dR<5MLh*w76nq zO()9bmv$*HG!RB4IK~Kn`~f288-Isd(H zkj{(sYa*;CPKq1FK*nOy3V%jSO{0KB>BdBjE=&SOchV~ij83g&K=iJl9B~&Nff5~o z)g3^oBWO|fH>u2%z!4BHZc>qY_!M{ngX&$XH?yeT-2PIV_eqngeNrJ9XNyW(D)#wv z?pI8fx={vyi&qJu0v=|lCnJvWa#cG|DFb$m1Y=LzrSy|v2 zwrP@KWFe#mbguaz#J#KbI4+lgK&>nl?{J8K4Aef>UzAu2mK~Pvdbai1;T%>MUpeNA zK*b8ns=*dZa*tBl?2*;mRyYRn7t*-l)Y~`+Q&evw;mF2}J{~OGxO_$JmZ~DRf4E3q zyy(%{yWk%lS)eXW%AS1Yji^q={>bLpkm+rTRlIz0AfB7CXaA)#>mv(HrAgMwjyIwl ziv25^%hm+k*gw#5Is&7|`vLny{9XdP8I<1sm~U=HvOPL#dI?x;rFq==*x?B3wTU22 zD^K1iAi<*kdXB;TGM4k8-0P>}2tXTVWC!pOhJnGAcD;OYI0W2EFt`=f%)#zi)T3u9 z32Gh(J>_kPNAta{<<8YlPfR+bua5VlA8I4xaiw0`> zqLp_ptCCOn&D;}|dfxJ5vts`<+?)WDUp>*r=8$uS|FWBd=N--Mn*gay(78?x6n{2K zn?DXd#g*;;JPuoZHs`#NjZ&#RhZy41DQ|}6Jy&S{f^!L0*Rr&bzf6;ZexZ&b9CXZK z^%Be?W;w(G_M>A6*#xIS{2Z!h95{EyBg{jbI_4Dw=d5qkppI#(s_AnGZe6=dUcBnj zHH<}6*NQ?}T|4U1wRfGm_6fY<1rFB?+lKFiEY5cq6_34WFSM({1)u$Lo%D>a6ZRxB&X;Gg( zOBtt6pQVh`r)_h~K%a)3`s9ElKEnbDih%(h2EgC{K`6~3VcXa;F?PXXdZ$F=$18{kMkw{RO@RhH>P1OCWb8#qdFg+3`^Pf#VAWe8;uyRKoj0$@IylZ)VlqpJoj%+D%BKG z7oih3fgCbRp%Cb

Ovypf}&GFhjjQd*0b5)vhU%yC5CmCSk%8Mj+vtaa*;6rb|fD zjqd<8Ab{KAiQQ}Gd0FtkZr1w}@f^ON3|!SQagG!a{%aTQ*tBi{0@jp$h_H=C=!FRD zxjPY|0TJGHA+&d02?0(9W3-q0$laVAqvOYvtA6h;6%2k*m6cE@0JpgS-DZ7O@_lY| z1-i|wIO;Z64TU@bzVugTtGZxzw%Uoa)lTay=J_<&Z1n~c0)>TQaXi1yXx~csPkAP;h35>TMy|;Z=Nx_+3A*?U~ebUjom4pgVE~-=ThXb~yUx;-HuUh0;zo}Ex{OTFh z*;<#~n>==}W;3=fv{t#>T3NeaSoFWK`xapw>iuoBWP=;iDB6A8MNs|5r)4&$-NSsc zcLZ=3S^$ExM=hp%tp=#vE9kRJVct0_cEWoE>hVuOy|O|9TngcDJZ)3%5&V{DFWE(e zD+ME9c-xlA+)mCmOb6SFgMD(`U>)9BV_@0kE1^4=W2m=KJ1)A zL|RkiJ4~br(h68)x!4c3W(ZOMyr1a=a$igan)V15l3-H+SZqj~(KBz7dr_-6k5<7@ zt2k|gcbRZZe~=|S`@*cakr>q^VpC!wGk1%_izIYl;}mAQiO__oo`C>Xxa=gfcx1a$ z1Qs)Q5mZ|!-T`LKgx$6Sj?tJEK;l#>Tuh&K#fv~H)W3)PmM~$MgJMqe5#%&Q^B-8D z`N(55d0;f}1x(A07S|8Y&o^>}H2ZmF6=uf!ocVMZU8mU}pjyX$k}FsVMl;yNOc1Ps z9C{GC7_a~9Sp64F1^w?01pU|fg8n#pXKo z0^DGB<~q)YFZG6Uc%t=i;4_{l~nyvxGtQh5oUkoGB1Y)vb)-uv{fA&{>${+;K5=_$APFcHZZX zsgMJ?B7u^ftsTBUrVi6Z8oId24ZnkPGY7^!&w}aQvj;etvx4(x7pd#Fgzl^PQz5hX`kj9W>y?c6v(THQ{%$fNO z)Pe3fG&Gx)Fr!fhHXM+H_4&XOFf(Z(T_6qR`ijAP(`CGxZiRE!(j(cu0SbL~8_YYl z>ikWliUq9`<|>2Xje#QaMKlD{Tnwf^_J@?+gI5j*F1_?usdB;H z1$=ouE(=5CZCyT9y&@<1a^g~xjl!R8v(pT``Pn;-H0lHqdU$Rlh;m!)6ME#QErC#|KOavID1v6hv_Zz(%&{2d+dD*ze z+`0-bj!z7rL?cI>hvls2hCE;YDWX1abp5$IqgZ zh6i(l!H^aX19p-!%>f2j&O0O$*>S@8b69XQ`hq#Lc*dg&&>@&vR9(VB{xUtF<}Tkh z5zYO<^x&w%_R#hZ4q;NT2znqs=qOtD2!DG++-1Au&4Y~}8JCN@GogVN=y1o}o0otP zPliCihu3Z9rO)Z@N_=BAPau+{aEmd z0rmu5;pAL{4$0@t&iB~zLO)z%X@TXscvxfUSL}b}xnea3);Gz{F#CIo=NIv8zej3F z!3f!R;O2h8ZOqwVJ62#Xy;~6!NuCmb*=NA2-YjbNuui>YdudWp{FAYerKm)+ox8Tu zQEcU!=hw(z0u{IZ9^;>Tov(%FB+j+yZ*c|r3f&x{h5-4%5W~k!Sli-5xCtxg!zQdb ze$gK|uxHv=I1`NS_0-=9pbIZ4Zn4(NA*PPoVkNl6DrdJ?8}=-qLG}JE))okdHSH31 z(yGKsYr%7R_gY`!>fITz*2n7w#gz#a1$$w~X!Y()T&;u2B&h7c)fT8=VDNV165M;t zthZ-;$3~Ki;=rSW&8|hdNzMrzU8ga80-E=biEc>Z+CGf|s5fi*u+Rt}%tvvdjolv< zNkut2ae1dnYSKz#rRMwRX4j#fOSfEevu+XLJZ|E@jks`bhU>iUx!E(%`E7L0&BBbZ zw|)nfYVLINRdcs_TP(1H@bUz=%$##HIv;WSYsdIGS`5w8JpS6VBk=!o{`L^|V#i)Q z^S7_v^S7bJlaD{JZqpNC!`f2x?%D1X4{E`>P2Q)l$&rxzzGvOWd>hwo z9BNp%S&?w=J?Fa3I;jAX!@VCIhpI!;WG(Dn4J3(Gpq+b{TuLC0mv|O4sON@+5_ zA+e*jd=Tzo^K6)m-;hwUi!mceCUX^j=+?^(E`be+i=gER-aw#pLn3P$9lMRjsxqic zbLdWAQcU{6Z~iuLBj-M+ALfs{U>j%g050aH7padg=>@9GwR|N<2K#I+<}A9@&E5^L z+J<>r_rvhR=4s%ciFOiU9>%qp&HG^t30>?g(8kUJ1;scE%zCWP4l`bMSv>2p$70>W7yoxQITi3Y^D*IMICst)AIQh>y*!?|_&>=WykCuDNF?JNN8E+FRTS8PM)p8ydD?8`s>Eld$gnadS^_ z9u$4@UYL9CU2+WPiFEGirL(~;1t;?U0ZcGtJ4|4P&VaTeVG&@s&wp$b(AyI}TQ_+l z%(Q;7ro>Jt58LR%IbA?Ih!aX!NFsLV&}fn7z^2%HF*n9Bd>Y;Qe9(met#li+;Q4w; z!p1Kvb>DPAG%TB>&_eD?Z^yZq;D;mqg%=q=+xvwVN$2$kw7@>bW3e!jVapQmJ31cM zM+D0RvMQwj@Aw&6ARfYGP@se5B%dwipuxXzBzPTHi94SkVUxukjVoE42DY=kk7<@m zE%)+vJv+FElbrp}XaSod*1(+=xN!z~pf5dn`|Id^?g^ovYhHJc?9Fk{>ymjJX1`J_ zectw?b50l#4i2O1S=lO_C=RL>bIsRhy!&6NFm?RDCXlg^#A<)UWv}u62IhH7aGv)N zu1DcKFVY9{NICU4XVKsEj`KG-K3^ZDNRUyWhWk^7x-Ch>a(as`SJv2*8Y5T~`}&^} zW?WAB!T^CuB+h5Gc0S`l;WmY zj5R5>CFhY=MD;?PB9)h%%bVSiFk>w*rHnVH^HU=EOey)oB>BoN!PiPFFIzK*t@FmL z^T{@~blVDM+seVN^TC5!ZY1B1S;9TR;yCsmmDzAk{as)m-nK+O z-IeV0dGtk|_*N%)wU_60OPW-C(`KscMVoNqe&Zd!eaf|-w!ih}u~v^ygnJetw}q`} zh2qQ~c#JhYoA7BGd!i|XY(?Kq>`}tbuinthwKUa&JDSEqUaE}Z+b)@byP2 zZrp+;7YE*`$C(d54R|jeG}|6(*@-i({WoDszEX}0(hBcCaO}q30KBb60JGF{{LKP# zO$lUUe8YRa*PbBuYIn?lU9<2C%~7oIXk>3cBI!F)3b%2d{p9K1D&OgZR^Z~sQt zvreA~Rg5tDuizX&#&Z0~vVhPP{B{_VQqGQA(=*ghYAgzw=} z7^YZVa0_IWNRBSh5DQu$gBO~!1-Q~$C2PT@)M32K!I&K=McKl7U#b-MTVL_WbS}vB zG{{uZ>IX8_XKo)(UcK~1e~)BSsbt5?)_$LCX+9(3h`!aD`7*PC;jM@{+mn2r_cR(W zd~^)F2nmi4${Ija*3oce^O!yv!qB)qb6C8=_yN0CBzfd|V!T{&@86F*V8{E=UTA%Kb7keLo^%wU6 zD27GXb+!+dsRU5|mt`kYC|)z@~UmkKKb;x$O+wf}O!0 zd^Q>C<_bjoBIX(HqTnQO{oRbiDG^pCu%&RHO!#ri7JN7l?eBjrla%EDNhUov2F~9v zlVjs$QUYe;mdOCzt>~7C9b{t4@W_O{AFY?l#JvUp>wn5RRwhF#xC_hc3fx5zLJE6G zYILhN+z8Ej#BRx5^oUZrsQgE;BWgvj=akw?-VEgShac2PMA@$3Wg*8}^O8oCSxQ z2**75L!~HyKm6bk?N7;)Lf;UEMi~HQZ#ag+AAVfd@tg)@IHtkhgmFjU@I4Yk$Z~*z zOx;I>i)Ikm&tw13q3h^^*E;LFjxM+v;Gi}57BqTv!S_Nr4*pz63Wt7A0u1j$aUBtk z^b7(l82(&G7yJgmr10lDy5M&KhVpbBU2q{V4#(v>B3uOcCjcCI;ySwE$pA*#`40v!42 zI=bLZPPpsnf**0hT}KxjP`LFM^_ar>EDYs~Fgzk$1aRa%>WS+};a&iM&u_(GLvRF} zN<*9S#4GmQq~rf#00gj7O%=YH)>F&#Zc`6z+*36b6&ouPiVI5<;JvbSdRn3wb;ZSS z7j|ueL0+Cpq3o+~;q|Aqq%TXTeZ1oF1bDeAytm(j88RdKf_PZi-B`UIUhPXC+QHX` z)3BeL3s@+gn{}RSz-uy~ox0&Sm#@mUD0qE~>c4`QQ?}hu9(*LRFoC zeOIyvJnkj2{{}ZK*m$b{O8S%fKVF5z*G=57G{zTuBlzE>(J9H!f1L9F`maznr>1C9 hl4sUct*?TYV9$hq-%_`ECj5f|)JH6>M{6PE{{WWWC<6ch literal 0 HcmV?d00001 diff --git a/clang/test/Cilk/addressof.cpp b/clang/test/Cilk/addressof.cpp new file mode 100644 index 00000000000000..747d9cef60b5d5 --- /dev/null +++ b/clang/test/Cilk/addressof.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 %s -x c++ -O1 -fopencilk -verify -fsyntax-only +// expected-no-diagnostics + +# 1 "/usr/obj/Cilk/16/lib/clang/16/include/cilk/opadd_reducer.h" 1 3 + + + + + +namespace cilk { + +template static void zero(void *v) { + *static_cast(v) = static_cast(0); +} + +template static void plus(void *l, void *r) { + *static_cast(l) += *static_cast(r); +} + +template using opadd_reducer = T _Hyperobject(zero, plus); + +} +# 4 "../reducer-tests/addressof-test.cpp" 2 + + + +template +scalar_t reduce_test() { + cilk::opadd_reducer res = 0; + return *&res; +} + + + +int foo(long var) { + long *t = __builtin_addressof(*&var); + return 0; +} diff --git a/clang/test/Cilk/cilk-exceptions.cpp b/clang/test/Cilk/cilk-exceptions.cpp new file mode 100644 index 00000000000000..a95e4deb54a767 --- /dev/null +++ b/clang/test/Cilk/cilk-exceptions.cpp @@ -0,0 +1,512 @@ +// Test case for code generation of Tapir for Cilk code that uses exceptions. +// +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s + +void handle_exn(int e = -1); + +class Foo { +public: + Foo() {} + ~Foo() {} +}; + +int bar(Foo *f); +int quuz(int i) noexcept; +int baz(const Foo &f); +__attribute__((always_inline)) +int foo(Foo *f) { + try + { + bar(f); + } + catch (int e) + { + handle_exn(e); + } + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// +/// Serial code snippets +//////////////////////////////////////////////////////////////////////////////// + +// CHECK-LABEL: @_Z15serial_noexcepti( +// CHECK-NOT: sync +void serial_noexcept(int n) { + quuz(n); + quuz(n); +} + +// CHECK-LABEL: @_Z13serial_excepti( +// CHECK-NOT: sync +void serial_except(int n) { + bar(new Foo()); + quuz(n); +} + +// CHECK-LABEL: @_Z15serial_tryblocki( +// CHECK-NOT: sync +void serial_tryblock(int n) { + try + { + quuz(n); + bar(new Foo()); + quuz(n); + bar(new Foo()); + } + catch (int e) + { + handle_exn(e); + } + catch (...) + { + handle_exn(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// _Cilk_for code snippets +//////////////////////////////////////////////////////////////////////////////// + +// CHECK-LABEL: @_Z20parallelfor_noexcepti( +// CHECK-NOT: detach within %{{.+}}, label %{{.+}}, label %{{.+}} unwind +// CHECK-NOT: landingpad +// CHECK-NOT: resume +void parallelfor_noexcept(int n) { + _Cilk_for (int i = 0; i < n; ++i) + quuz(i); +} + +// CHECK-LABEL: @_Z18parallelfor_excepti( +// CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[DUNWIND:.+]] +// CHECK: call {{.*}}ptr @_Znwm(i64 noundef 1) +// CHECK: invoke void @_ZN3FooC1Ev( +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[TASKLPAD:.+]] +// CHECK: [[INVOKECONT2]]: +// CHECK: call {{.*}}i32 @_Z3barP3Foo( +// CHECK: reattach within %[[SYNCREG]] +// CHECK-DAG: sync within %[[SYNCREG]] +// CHECK: [[TASKLPAD]]: +// CHECK-NEXT: landingpad [[LPADTYPE:.+]] +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow +// CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) +// CHECK-NEXT: to label %[[DRUNREACH:.+]] unwind label %[[DUNWIND]] +// CHECK: [[DUNWIND]]: +// CHECK-NEXT: landingpad [[LPADTYPE]] +// CHECK-NEXT: cleanup +// CHECK: [[DRUNREACH]]: +// CHECK-NEXT: unreachable +void parallelfor_except(int n) { + _Cilk_for (int i = 0; i < n; ++i) + bar(new Foo()); +} + +// CHECK-LABEL: @_Z20parallelfor_tryblocki( +void parallelfor_tryblock(int n) { + // CHECK: %[[SYNCREG1:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[SYNCREG2:.+]] = call token @llvm.syncregion.start() + try + { + // CHECK-NOT: detach within %[[SYNCREG1]], label %{{.+}}, label %{{.+}} unwind + _Cilk_for (int i = 0; i < n; ++i) + quuz(i); + // CHECK: invoke void @llvm.sync.unwind(token %[[SYNCREG1]]) + // CHECK-NEXT: to label %{{.+}} unwind label %[[CATCH:.+]] + // CHECK: [[CATCH]]: + // CHECK: landingpad [[LPADTYPE]] + // CHECK-NEXT: catch ptr @_ZTIi + // CHECK-NEXT: catch ptr null + + // CHECK: detach within %[[SYNCREG2]], label %[[DETACHED:.+]], label %{{.+}} unwind label %[[CATCH]] + // CHECK: [[DETACHED]]: + // CHECK: %[[OBJ:.+]] = invoke {{.*}}ptr @_Znwm(i64 noundef 1) + // CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT1]] + // CHECK: invoke void @_ZN3FooC1Ev(ptr + // CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[TASKLPAD2:.+]] + // CHECK: [[INVOKECONT2]] + // CHECK: invoke {{.*}}i32 @_Z3barP3Foo(ptr + // CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT3]] + // CHECK: reattach within %[[SYNCREG2]] + // CHECK-DAG: sync within %[[SYNCREG2]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: br label %[[TASKRESUME:.+]] + // CHECK: [[TASKLPAD2]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: call void @_ZdlPv(ptr noundef %[[OBJ]]) + // CHECK: br label %[[TASKRESUME]] + // CHECK: [[TASKRESUME]]: + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG2]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[CATCH]] + _Cilk_for (int i = 0; i < n; ++i) + bar(new Foo()); + } + catch (int e) + { + handle_exn(e); + } + catch (...) + { + handle_exn(); + } +} + +// CHECK-LABEL: @_Z27parallelfor_tryblock_inlinei( +void parallelfor_tryblock_inline(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + try + { + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %{{.+}} unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED]]: + // CHECK: invoke {{.*}}ptr @_Znwm( + // CHECK: invoke void @_ZN3FooC1Ev( + // CHECK: invoke {{.*}}i32 @_Z3barP3Foo( + // CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK-NEXT: catch ptr @_ZTIi + // CHECK: br i1 {{.+}}, label {{.+}}, label %[[CATCHRESUME:.+]] + // CHECK: [[CATCHRESUME]]: + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: [[DUNWIND]]: + // CHECK: landingpad [[LPADTYPE]] + // CHECK-NEXT: catch ptr @_ZTIi + // CHECK-NEXT: catch ptr null + _Cilk_for (int i = 0; i < n; ++i) + foo(new Foo()); + } + catch (int e) + { + handle_exn(e); + } + catch (...) + { + handle_exn(); + } +} + +//////////////////////////////////////////////////////////////////////////////// +/// _Cilk_spawn code snippets +//////////////////////////////////////////////////////////////////////////////// + +// CHECK-LABEL: @_Z14spawn_noexcepti( +// CHECK-NOT: landingpad +// CHECK-NOT: detached.rethrow +void spawn_noexcept(int n) { + _Cilk_spawn quuz(n); + quuz(n); +} + +// CHECK-LABEL: @_Z15spawn_tf_excepti( +void spawn_tf_except(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[EXN:.+]] = alloca ptr + // CHECK: %[[EHSELECTOR:.+]] = alloca i32 + // CHECK: invoke void @_ZN3FooC1Ev(ptr + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT]] + // CHECK-NEXT: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %{{.+}} + // CHECK: [[DETACHED]]: + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-NEXT: call {{.*}}i32 @_Z3barP3Foo( + // CHECK-NEXT: reattach within %[[SYNCREG]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[TFUNWIND:.+]] + // CHECK: [[TFUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK: resume [[LPADTYPE]] + _Cilk_spawn bar(new Foo()); + quuz(n); +} + +// CHECK-LABEL: @_Z21spawn_stmt_destructori( +void spawn_stmt_destructor(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP:.+]] = alloca %class.Foo + // CHECK: %[[EXNTF:.+]] = alloca ptr + // CHECK: %[[EHSELECTORTF:.+]] = alloca i32 + // CHECK: call void @_ZN3FooC1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %{{.+}} unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED]]: + // CHECK-NEXT: %[[EXN:.+]] = alloca ptr + // CHECK-NEXT: %[[EHSELECTOR:.+]] = alloca i32 + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-NEXT: invoke {{.*}}i32 @_Z3bazRK3Foo( + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: call void @_ZN3FooD1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK: call void @_ZN3FooD1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: [[DUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[TFUNWIND:.+]] + // CHECK: [[TFUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK-NOT: load ptr, ptr %[[EXNTF]], + // CHECK-NOT: load i32, ptr %[[EHSELECTORTF]], + // CHECK: resume [[LPADTYPE]] + _Cilk_spawn baz(Foo()); + quuz(n); +} + +// CHECK-LABEL: @_Z21spawn_decl_destructori( +void spawn_decl_destructor(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP:.+]] = alloca %class.Foo + // CHECK: %[[EXNTF:.+]] = alloca ptr + // CHECK: %[[EHSELECTORTF:.+]] = alloca i32 + // CHECK: call void @_ZN3FooC1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %{{.+}} unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED]]: + // CHECK-NEXT: %[[EXN:.+]] = alloca ptr + // CHECK-NEXT: %[[EHSELECTOR:.+]] = alloca i32 + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK: %[[CALL:.+]] = invoke {{.*}}i32 @_Z3bazRK3Foo( + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: call void @_ZN3FooD1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-NEXT: store i32 %[[CALL]] + // CHECK-NEXT: reattach within %[[SYNCREG]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK: call void @_ZN3FooD1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: [[DUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[TFUNWIND:.+]] + // CHECK: [[TFUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK-NOT: load ptr, ptr %[[EXNTF]], + // CHECK-NOT: load i32, ptr %[[EHSELECTORTF]], + // CHECK: resume [[LPADTYPE]] + int result = _Cilk_spawn baz(Foo()); + quuz(n); +} + +// Technically this code has a potential race between the spawned execution of +// baz and the destructor for f. I see two ways around this problem. 1) Leave +// it to the user to resolve these races. 2) Delegate the execution of +// destructors to the runtime system and ensure that the runtime system executes +// destructors only on when the leftmost child returns. I don't see a way the +// compiler can solve this on its own, particularly when spawns and syncs can +// happen dynamically. + +// CHECK-LABEL: @_Z22spawn_block_destructori( +void spawn_block_destructor(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: call void @_ZN3FooC1Ev(ptr {{.*}}nonnull {{.*}}dereferenceable(1) %[[REFTMP:.+]]) + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK-NEXT: %[[EXNTF:.+]] = alloca ptr + // CHECK-NEXT: %[[EHSELECTORTF:.+]] = alloca i32 + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[DETCONT:.+]] unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED]]: + // CHECK-NEXT: %[[EXN:.+]] = alloca ptr + // CHECK-NEXT: %[[EHSELECTOR:.+]] = alloca i32 + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK: %[[CALL:.+]] = invoke {{.*}}i32 @_Z3bazRK3Foo( + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: store i32 %[[CALL]] + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[DETCONT]] + // CHECK: [[DETCONT]]: + // CHECK: call {{.*}}i32 @_Z4quuzi( + // CHECK-NEXT: call void @_ZN3FooD1Ev( + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: [[DUNWIND]]: + // CHECK: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[TFUNWIND:.+]] + // CHECK: [[TFUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXN]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTOR]], + // CHECK-NOT: load ptr, ptr %[[EXN]], + // CHECK-NOT: load i32, ptr %[[EHSELECTOR]], + // CHECK-NOT: store ptr %{{.+}}, ptr %[[EXNTF]], + // CHECK-NOT: store i32 %{{.+}}, ptr %[[EHSELECTORTF]], + // CHECK-NOT: load ptr, ptr %[[EXNTF]], + // CHECK-NOT: load i32, ptr %[[EHSELECTORTF]], + // CHECK: resume [[LPADTYPE]] + { + auto f = Foo(); + int result = _Cilk_spawn baz(f); + quuz(n); + } +} + +// CHECK-LABEL: @_Z18spawn_throw_inlinei( +void spawn_throw_inline(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: call {{.*}}ptr @_Znwm( + // CHECK: invoke void @_ZN3FooC1Ev( + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %{{.+}} unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK: invoke {{.*}}i32 @_Z3barP3Foo( + // CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: catch ptr @_ZTIi + // CHECK: br i1 {{.+}}, label {{.+}}, label %[[CATCHRESUME:.+]] + // CHECK: [[CATCHRESUME]]: + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: [[DUNWIND]]: + // CHECK-NEXT: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label {{.+}} + _Cilk_spawn foo(new Foo()); + quuz(n); +} + +// CHECK-LABEL: @_Z14spawn_tryblocki( +void spawn_tryblock(int n) { + // CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + try + { + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + // CHECK-NOT: unwind + // CHECK: [[DETACHED1]]: + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-NEXT: call {{.*}}i32 @_Z4quuzi( + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + _Cilk_spawn quuz(n); + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DUNWIND:.+]] + // CHECK: [[DETACHED2]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) + // CHECK: invoke {{.*}}i32 @_Z3barP3Foo( + // CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[TASKLPAD:.+]] + // CHECK: [[INVOKECONT1]]: + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + _Cilk_spawn bar(new Foo()); + // CHECK: %[[TASKFRAME3:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] + // CHECK: [[DETACHED3]]: + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) + // CHECK-NEXT: call {{.*}}i32 @_Z4quuzi( + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + _Cilk_spawn quuz(n); + // CHECK: [[CONTINUE3]]: + // CHECK: invoke {{.*}}i32 @_Z3barP3Foo( + // CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[CONT3UNWIND:.+]] + bar(new Foo()); + // CHECK: [[INVOKECONT2]]: + // CHECK-NEXT: sync within %[[SYNCREG]] + _Cilk_sync; + } + // CHECK: [[DUNWIND]]: + // CHECK: landingpad [[LPADTYPE]] + // CHECK-NEXT: cleanup + // CHECK: [[TASKLPAD]]: + // CHECK-NEXT: landingpad [[LPADTYPE:.+]] + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.detached.rethrow + // CHECK: (token %[[SYNCREG]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[DUNWIND]] + // CHECK: invoke void @llvm.taskframe.resume + // CHECK: (token %[[TASKFRAME2]], [[LPADTYPE]] {{.+}}) + // CHECK-NEXT: to label {{.+}} unwind label %[[CONT3UNWIND]] + // CHECK: [[CONT3UNWIND]]: + // CHECK: landingpad [[LPADTYPE]] + // CHECK-NEXT: catch ptr @_ZTIi + // CHECK-NEXT: catch ptr null + catch (int e) + { + handle_exn(e); + } + catch (...) + { + handle_exn(); + } +} diff --git a/clang/test/Cilk/cilkfor-bad-input.c b/clang/test/Cilk/cilkfor-bad-input.c new file mode 100644 index 00000000000000..321907ba67216e --- /dev/null +++ b/clang/test/Cilk/cilkfor-bad-input.c @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only + +void f1() { + _Cilk_for(bad i = 0; i < 42; ++i); // expected-error{{use of undeclared identifier 'bad'}} expected-error{{use of undeclared identifier 'i'}} expected-error{{use of undeclared identifier 'i'}} expected-error{{expected control variable declaration in initializer in '_Cilk_for'}} +} + +void f2() { + _Cilk_for(int i = 0; ; ++i); // expected-error{{missing loop condition expression}} expected-error{{expected binary comparison operator in '_Cilk_for' loop condition}} +} + +void f3() { + _Cilk_for(int i = 0; i < 42; ); // expected-error{{missing loop increment expression}} expected-warning{{Cilk for loop has empty body}} +} + +void f4() { + int i; + _Cilk_for(; i < 42; ++i); // expected-error{{missing control variable declaration and initialization in '_Cilk_for'}} expected-error{{expected control variable declaration in initializer in '_Cilk_for'}} +} + +void f5(const long *begin) { + _Cilk_for (const long *p = begin; p != 666; ++p) + ; + // expected-warning@-2{{comparison between pointer and integer}} + // expected-warning@-3{{Cilk for loop has empty body}} +} diff --git a/clang/test/Cilk/cilkfor-bounds.cpp b/clang/test/Cilk/cilkfor-bounds.cpp new file mode 100644 index 00000000000000..2ece96abb69c92 --- /dev/null +++ b/clang/test/Cilk/cilkfor-bounds.cpp @@ -0,0 +1,987 @@ +// RUN: %clang_cc1 %s -std=c++11 -triple x86_64-unknown-linux-gnu -fopencilk -ftapir=none -verify -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +typedef __SIZE_TYPE__ size_t; + +void bar(size_t i); + +void up(size_t start, size_t end) { + _Cilk_for (size_t i = start; i < end; ++i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z2upmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ult i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], 1 +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ult i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_leq(size_t start, size_t end) { + _Cilk_for (size_t i = start; i <= end; ++i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z6up_leqmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ule i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], 1 +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ule i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD2:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_flip(size_t start, size_t end) { + _Cilk_for (size_t i = start; end > i; ++i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z7up_flipmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ugt i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], 1 +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ugt i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD3:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_flip_geq(size_t start, size_t end) { + _Cilk_for (size_t i = start; end >= i; ++i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z11up_flip_geqmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp uge i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], 1 +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp uge i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD4:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_stride(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; i < end; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z9up_stridemmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ult i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], %[[STRIDE]] +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ult i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD5:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_stride_leq(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; i <= end; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z13up_stride_leqmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ule i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ule i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD6:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_stride_flip(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; end > i; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z14up_stride_flipmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ugt i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], %[[STRIDE]] +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ugt i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD7:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_stride_flip_geq(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; end >= i; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z18up_stride_flip_geqmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp uge i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp uge i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD8:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_ne_stride(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; i != end; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z12up_ne_stridemmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ne i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ne i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD8:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void up_ne_stride_flip(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = start; end != i; i += stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z17up_ne_stride_flipmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ne i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDLIMIT]], %[[ENDINIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = add i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ne i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD9:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down(size_t start, size_t end) { + _Cilk_for (size_t i = end; i > start; --i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z4downmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ugt i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], 1 +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ugt i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD10:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_geq(size_t start, size_t end) { + _Cilk_for (size_t i = end; i >= start; --i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z8down_geqmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp uge i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], 1 +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp uge i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD11:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_flip(size_t start, size_t end) { + _Cilk_for (size_t i = end; start < i; --i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z9down_flipmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ult i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], 1 +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ult i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD12:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_flip_leq(size_t start, size_t end) { + _Cilk_for (size_t i = end; start <= i; --i) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z13down_flip_leqmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ule i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], 1 +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], 1 +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ule i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD13:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_stride(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; i > start; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z11down_stridemmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ugt i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], %[[STRIDE]] +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ugt i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD14:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_stride_geq(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; i >= start; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z15down_stride_geqmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp uge i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp uge i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD15:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_stride_flip(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; start < i; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z16down_stride_flipmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ult i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[ENDSUB1:.+]] = sub i64 %[[ENDSUB]], 1 +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB1]], %[[STRIDE]] +// CHECK-NEXT: %[[ENDADD:.+]] = add i64 %[[ENDDIV]], 1 +// CHECK-NEXT: store i64 %[[ENDADD]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ult i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD16:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_stride_flip_leq(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; start <= i; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z20down_stride_flip_leqmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ule i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ule i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD17:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_ne_stride(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; i != start; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z14down_ne_stridemmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ne i64 %[[INITCMPINIT]], %[[INITCMPLIMIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[COND:.+]] = icmp ne i64 %[[CONDEND]], %[[CONDBEGIN]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD18:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void down_ne_stride_flip(size_t start, size_t end, size_t stride) { + _Cilk_for (size_t i = end; start != i; i -= stride) + bar(i); +} + +// CHECK-LABEL: define {{.*}}void @_Z19down_ne_stride_flipmmm( + +// CHECK: %[[START:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[START]], ptr %[[INIT:.+]], align 8 +// CHECK-NEXT: %[[END:.+]] = load i64, ptr +// CHECK-NEXT: store i64 %[[END]], ptr %[[LIMIT:.+]], align 8 +// CHECK-NEXT: %[[INITCMPLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[INITCMPINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[INITCMP:.+]] = icmp ne i64 %[[INITCMPLIMIT]], %[[INITCMPINIT]] +// CHECK-NEXT: br i1 %[[INITCMP]], label %[[PFORPH:.+]], label %[[PFOREND:.+]] + +// CHECK: [[PFORPH]]: +// CHECK-NEXT: store i64 0, ptr %[[BEGIN:.+]], align 8 +// CHECK-NEXT: %[[ENDINIT:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[ENDLIMIT:.+]] = load i64, ptr %[[LIMIT]] +// CHECK-NEXT: %[[ENDSUB:.+]] = sub i64 %[[ENDINIT]], %[[ENDLIMIT]] +// CHECK-NEXT: %[[STRIDE:.+]] = load i64, ptr %[[STRIDEADDR:.+]], align 8 +// CHECK-NEXT: %[[ENDDIV:.+]] = udiv i64 %[[ENDSUB]], %[[STRIDE]] +// CHECK-NEXT: store i64 %[[ENDDIV]], ptr %[[END:.+]], align 8 + +// CHECK: %[[INITITER:.+]] = load i64, ptr %[[INIT]] +// CHECK-NEXT: %[[BEGINITER:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[STRIDEITER:.+]] = load i64, ptr %[[STRIDEADDR]] +// CHECK-NEXT: %[[ITERMUL:.+]] = mul i64 %[[BEGINITER]], %[[STRIDEITER]] +// CHECK-NEXT: %[[ITERADD:.+]] = sub i64 %[[INITITER]], %[[ITERMUL]] +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[PFORINC:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: %[[ITERSLOT:.+]] = alloca i64, align 8 +// CHECK: store i64 %[[ITERADD]], ptr %[[ITERSLOT]] + +// CHECK: [[PFORINC]]: +// CHECK-NEXT: %[[INCBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[INC:.+]] = add i64 %[[INCBEGIN]], 1 +// CHECK-NEXT: store i64 %[[INC]], ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDBEGIN:.+]] = load i64, ptr %[[BEGIN]] +// CHECK-NEXT: %[[CONDEND:.+]] = load i64, ptr %[[END]] +// CHECK-NEXT: %[[COND:.+]] = icmp ne i64 %[[CONDBEGIN]], %[[CONDEND]] +// CHECK-NEXT: br i1 %[[COND]], label %{{.+}}, label %[[PFORCONDCLEANUP:.+]], !llvm.loop ![[LOOPMD19:.+]] + +// CHECK: [[PFORCONDCLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +// CHECK: ![[LOOPMD]] = distinct !{![[LOOPMD]], ![[SPAWNSTRATEGY:.+]]} +// CHECK: ![[SPAWNSTRATEGY]] = !{!"tapir.loop.spawn.strategy", i32 1} +// CHECK: ![[LOOPMD2]] = distinct !{![[LOOPMD2]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD3]] = distinct !{![[LOOPMD3]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD4]] = distinct !{![[LOOPMD4]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD5]] = distinct !{![[LOOPMD5]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD6]] = distinct !{![[LOOPMD6]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD7]] = distinct !{![[LOOPMD7]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD8]] = distinct !{![[LOOPMD8]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD9]] = distinct !{![[LOOPMD9]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD10]] = distinct !{![[LOOPMD10]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD11]] = distinct !{![[LOOPMD11]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD12]] = distinct !{![[LOOPMD12]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD13]] = distinct !{![[LOOPMD13]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD14]] = distinct !{![[LOOPMD14]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD15]] = distinct !{![[LOOPMD15]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD16]] = distinct !{![[LOOPMD16]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD17]] = distinct !{![[LOOPMD17]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD18]] = distinct !{![[LOOPMD18]], ![[SPAWNSTRATEGY]]} +// CHECK: ![[LOOPMD19]] = distinct !{![[LOOPMD19]], ![[SPAWNSTRATEGY]]} diff --git a/clang/test/Cilk/cilkfor-continue.c b/clang/test/Cilk/cilkfor-continue.c new file mode 100644 index 00000000000000..b3b2ffa5d3ac59 --- /dev/null +++ b/clang/test/Cilk/cilkfor-continue.c @@ -0,0 +1,11167 @@ +// Verify that a continue statement in a cilk_for loop can only reach +// the reattach for the loop body, and that the CFG generated for such +// a loop is valid. +// +// RUN: %clang_cc1 %s -std=c99 -triple x86_64-unknown-linux-gnu -O1 -fopencilk -ftapir=none -verify -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +# 1 "" +# 1 "oski.c" +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 1 "oski.c" +# 1 "/usr/include/assert.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * ISO C99 Standard: 7.2 Diagnostics + */ + +#ifdef _ASSERT_H + +# undef _ASSERT_H +# undef assert +# undef __ASSERT_VOID_CAST + +# ifdef __USE_GNU +# undef assert_perror +# endif +# 31 "/usr/include/assert.h" 3 4 + +#endif /* assert.h */ +# 33 "/usr/include/assert.h" 3 4 + +#define _ASSERT_H 1 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 35 "/usr/include/assert.h" 3 4 +# 1 "/usr/include/features.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _FEATURES_H +#define _FEATURES_H 1 + +/* These are defined by the user (or the compiler) + to specify the desired environment: + + __STRICT_ANSI__ ISO Standard C. + _ISOC99_SOURCE Extensions to ISO C89 from ISO C99. + _ISOC11_SOURCE Extensions to ISO C99 from ISO C11. + _POSIX_SOURCE IEEE Std 1003.1. + _POSIX_C_SOURCE If ==1, like _POSIX_SOURCE; if >=2 add IEEE Std 1003.2; + if >=199309L, add IEEE Std 1003.1b-1993; + if >=199506L, add IEEE Std 1003.1c-1995; + if >=200112L, all of IEEE 1003.1-2004 + if >=200809L, all of IEEE 1003.1-2008 + _XOPEN_SOURCE Includes POSIX and XPG things. Set to 500 if + Single Unix conformance is wanted, to 600 for the + sixth revision, to 700 for the seventh revision. + _XOPEN_SOURCE_EXTENDED XPG things and X/Open Unix extensions. + _LARGEFILE_SOURCE Some more functions for correct standard I/O. + _LARGEFILE64_SOURCE Additional functionality from LFS for large files. + _FILE_OFFSET_BITS=N Select default filesystem interface. + _BSD_SOURCE ISO C, POSIX, and 4.3BSD things. + _SVID_SOURCE ISO C, POSIX, and SVID things. + _ATFILE_SOURCE Additional *at interfaces. + _GNU_SOURCE All of the above, plus GNU extensions. + _DEFAULT_SOURCE The default set of features (taking precedence over + __STRICT_ANSI__). + _REENTRANT Select additionally reentrant object. + _THREAD_SAFE Same as _REENTRANT, often used by other systems. + _FORTIFY_SOURCE If set to numeric value > 0 additional security + measures are defined, according to level. + + The `-ansi' switch to the GNU C compiler, and standards conformance + options such as `-std=c99', define __STRICT_ANSI__. If none of + these are defined, or if _DEFAULT_SOURCE is defined, the default is + to have _SVID_SOURCE, _BSD_SOURCE, and _POSIX_SOURCE set to one and + _POSIX_C_SOURCE set to 200809L. If more than one of these are + defined, they accumulate. For example __STRICT_ANSI__, + _POSIX_SOURCE and _POSIX_C_SOURCE together give you ISO C, 1003.1, + and 1003.2, but nothing else. + + These are defined by this file and are used by the + header files to decide what to declare or define: + + __USE_ISOC11 Define ISO C11 things. + __USE_ISOC99 Define ISO C99 things. + __USE_ISOC95 Define ISO C90 AMD1 (C95) things. + __USE_POSIX Define IEEE Std 1003.1 things. + __USE_POSIX2 Define IEEE Std 1003.2 things. + __USE_POSIX199309 Define IEEE Std 1003.1, and .1b things. + __USE_POSIX199506 Define IEEE Std 1003.1, .1b, .1c and .1i things. + __USE_XOPEN Define XPG things. + __USE_XOPEN_EXTENDED Define X/Open Unix things. + __USE_UNIX98 Define Single Unix V2 things. + __USE_XOPEN2K Define XPG6 things. + __USE_XOPEN2KXSI Define XPG6 XSI things. + __USE_XOPEN2K8 Define XPG7 things. + __USE_XOPEN2K8XSI Define XPG7 XSI things. + __USE_LARGEFILE Define correct standard I/O things. + __USE_LARGEFILE64 Define LFS things with separate names. + __USE_FILE_OFFSET64 Define 64bit interface as default. + __USE_BSD Define 4.3BSD things. + __USE_SVID Define SVID things. + __USE_MISC Define things common to BSD and System V Unix. + __USE_ATFILE Define *at interfaces and AT_* constants for them. + __USE_GNU Define GNU extensions. + __USE_REENTRANT Define reentrant/thread-safe *_r functions. + __USE_FORTIFY_LEVEL Additional security measures used, according to level. + + The macros `__GNU_LIBRARY__', `__GLIBC__', and `__GLIBC_MINOR__' are + defined by this file unconditionally. `__GNU_LIBRARY__' is provided + only for compatibility. All new code should use the other symbols + to test for features. + + All macros listed above as possibly being defined by this file are + explicitly undefined if they are not explicitly defined. + Feature-test macros that are not defined by the user or compiler + but are implied by the other feature-test macros defined (or by the + lack of any definitions) are defined by the file. */ + + +/* Undefine everything, so we get a clean slate. */ +#undef __USE_ISOC11 +#undef __USE_ISOC99 +#undef __USE_ISOC95 +#undef __USE_ISOCXX11 +#undef __USE_POSIX +#undef __USE_POSIX2 +#undef __USE_POSIX199309 +#undef __USE_POSIX199506 +#undef __USE_XOPEN +#undef __USE_XOPEN_EXTENDED +#undef __USE_UNIX98 +#undef __USE_XOPEN2K +#undef __USE_XOPEN2KXSI +#undef __USE_XOPEN2K8 +#undef __USE_XOPEN2K8XSI +#undef __USE_LARGEFILE +#undef __USE_LARGEFILE64 +#undef __USE_FILE_OFFSET64 +#undef __USE_BSD +#undef __USE_SVID +#undef __USE_MISC +#undef __USE_ATFILE +#undef __USE_GNU +#undef __USE_REENTRANT +#undef __USE_FORTIFY_LEVEL +#undef __KERNEL_STRICT_NAMES + +/* Suppress kernel-name space pollution unless user expressedly asks + for it. */ +#ifndef _LOOSE_KERNEL_NAMES +# define __KERNEL_STRICT_NAMES +#endif +# 133 "/usr/include/features.h" 3 4 + +/* Convenience macros to test the versions of glibc and gcc. + Use them like this: + #if __GNUC_PREREQ (2,8) + ... code requiring gcc 2.8 or later ... + #endif + Note - they won't work for gcc1 or glibc1, since the _MINOR macros + were not defined then. */ +#if defined __GNUC__ && defined __GNUC_MINOR__ +# define __GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# 145 "/usr/include/features.h" 3 4 +# define __GNUC_PREREQ(maj, min) 0 +#endif +# 147 "/usr/include/features.h" 3 4 + + +/* If _GNU_SOURCE was defined by the user, turn on all the other features. */ +#ifdef _GNU_SOURCE +# undef _ISOC95_SOURCE +# define _ISOC95_SOURCE 1 +# undef _ISOC99_SOURCE +# define _ISOC99_SOURCE 1 +# undef _ISOC11_SOURCE +# define _ISOC11_SOURCE 1 +# undef _POSIX_SOURCE +# define _POSIX_SOURCE 1 +# undef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L +# undef _XOPEN_SOURCE +# define _XOPEN_SOURCE 700 +# undef _XOPEN_SOURCE_EXTENDED +# define _XOPEN_SOURCE_EXTENDED 1 +# undef _LARGEFILE64_SOURCE +# define _LARGEFILE64_SOURCE 1 +# undef _DEFAULT_SOURCE +# define _DEFAULT_SOURCE 1 +# undef _BSD_SOURCE +# define _BSD_SOURCE 1 +# undef _SVID_SOURCE +# define _SVID_SOURCE 1 +# undef _ATFILE_SOURCE +# define _ATFILE_SOURCE 1 +#endif +# 176 "/usr/include/features.h" 3 4 + +/* If nothing (other than _GNU_SOURCE and _DEFAULT_SOURCE) is defined, + define _DEFAULT_SOURCE, _BSD_SOURCE and _SVID_SOURCE. */ +#if (defined _DEFAULT_SOURCE \ + || (!defined __STRICT_ANSI__ \ + && !defined _ISOC99_SOURCE \ + && !defined _POSIX_SOURCE && !defined _POSIX_C_SOURCE \ + && !defined _XOPEN_SOURCE \ + && !defined _BSD_SOURCE && !defined _SVID_SOURCE)) +# undef _DEFAULT_SOURCE +# define _DEFAULT_SOURCE 1 +# undef _BSD_SOURCE +# define _BSD_SOURCE 1 +# undef _SVID_SOURCE +# define _SVID_SOURCE 1 +#endif +# 192 "/usr/include/features.h" 3 4 + +/* This is to enable the ISO C11 extension. */ +#if (defined _ISOC11_SOURCE \ + || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L)) +# define __USE_ISOC11 1 +#endif +# 198 "/usr/include/features.h" 3 4 + +/* This is to enable the ISO C99 extension. */ +#if (defined _ISOC99_SOURCE || defined _ISOC11_SOURCE \ + || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) +# define __USE_ISOC99 1 +#endif +# 204 "/usr/include/features.h" 3 4 + +/* This is to enable the ISO C90 Amendment 1:1995 extension. */ +#if (defined _ISOC99_SOURCE || defined _ISOC11_SOURCE \ + || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199409L)) +# define __USE_ISOC95 1 +#endif +# 210 "/usr/include/features.h" 3 4 + +/* This is to enable compatibility for ISO C++11. + + So far g++ does not provide a macro. Check the temporary macro for + now, too. */ +#if ((defined __cplusplus && __cplusplus >= 201103L) \ + || defined __GXX_EXPERIMENTAL_CXX0X__) +# define __USE_ISOCXX11 1 +#endif +# 219 "/usr/include/features.h" 3 4 + +/* If none of the ANSI/POSIX macros are defined, or if _DEFAULT_SOURCE + is defined, use POSIX.1-2008 (or another version depending on + _XOPEN_SOURCE). */ +#ifdef _DEFAULT_SOURCE +# if !defined _POSIX_SOURCE && !defined _POSIX_C_SOURCE +# define __USE_POSIX_IMPLICITLY 1 +# endif +# 227 "/usr/include/features.h" 3 4 +# undef _POSIX_SOURCE +# define _POSIX_SOURCE 1 +# undef _POSIX_C_SOURCE +# define _POSIX_C_SOURCE 200809L +#endif +# 232 "/usr/include/features.h" 3 4 +#if ((!defined __STRICT_ANSI__ || (_XOPEN_SOURCE - 0) >= 500) && \ + !defined _POSIX_SOURCE && !defined _POSIX_C_SOURCE) +# define _POSIX_SOURCE 1 +# if defined _XOPEN_SOURCE && (_XOPEN_SOURCE - 0) < 500 +# define _POSIX_C_SOURCE 2 +# elif defined _XOPEN_SOURCE && (_XOPEN_SOURCE - 0) < 600 +# 238 "/usr/include/features.h" 3 4 +# define _POSIX_C_SOURCE 199506L +# elif defined _XOPEN_SOURCE && (_XOPEN_SOURCE - 0) < 700 +# 240 "/usr/include/features.h" 3 4 +# define _POSIX_C_SOURCE 200112L +# else +# 242 "/usr/include/features.h" 3 4 +# define _POSIX_C_SOURCE 200809L +# endif +# 244 "/usr/include/features.h" 3 4 +# define __USE_POSIX_IMPLICITLY 1 +#endif +# 246 "/usr/include/features.h" 3 4 + +#if defined _POSIX_SOURCE || _POSIX_C_SOURCE >= 1 || defined _XOPEN_SOURCE +# define __USE_POSIX 1 +#endif +# 250 "/usr/include/features.h" 3 4 + +#if defined _POSIX_C_SOURCE && _POSIX_C_SOURCE >= 2 || defined _XOPEN_SOURCE +# define __USE_POSIX2 1 +#endif +# 254 "/usr/include/features.h" 3 4 + +#if (_POSIX_C_SOURCE - 0) >= 199309L +# define __USE_POSIX199309 1 +#endif +# 258 "/usr/include/features.h" 3 4 + +#if (_POSIX_C_SOURCE - 0) >= 199506L +# define __USE_POSIX199506 1 +#endif +# 262 "/usr/include/features.h" 3 4 + +#if (_POSIX_C_SOURCE - 0) >= 200112L +# define __USE_XOPEN2K 1 +# undef __USE_ISOC95 +# define __USE_ISOC95 1 +# undef __USE_ISOC99 +# define __USE_ISOC99 1 +#endif +# 270 "/usr/include/features.h" 3 4 + +#if (_POSIX_C_SOURCE - 0) >= 200809L +# define __USE_XOPEN2K8 1 +# undef _ATFILE_SOURCE +# define _ATFILE_SOURCE 1 +#endif +# 276 "/usr/include/features.h" 3 4 + +#ifdef _XOPEN_SOURCE +# define __USE_XOPEN 1 +# if (_XOPEN_SOURCE - 0) >= 500 +# define __USE_XOPEN_EXTENDED 1 +# define __USE_UNIX98 1 +# undef _LARGEFILE_SOURCE +# define _LARGEFILE_SOURCE 1 +# if (_XOPEN_SOURCE - 0) >= 600 +# if (_XOPEN_SOURCE - 0) >= 700 +# define __USE_XOPEN2K8 1 +# define __USE_XOPEN2K8XSI 1 +# endif +# 289 "/usr/include/features.h" 3 4 +# define __USE_XOPEN2K 1 +# define __USE_XOPEN2KXSI 1 +# undef __USE_ISOC95 +# define __USE_ISOC95 1 +# undef __USE_ISOC99 +# define __USE_ISOC99 1 +# endif +# 296 "/usr/include/features.h" 3 4 +# else +# 297 "/usr/include/features.h" 3 4 +# ifdef _XOPEN_SOURCE_EXTENDED +# define __USE_XOPEN_EXTENDED 1 +# endif +# 300 "/usr/include/features.h" 3 4 +# endif +# 301 "/usr/include/features.h" 3 4 +#endif +# 302 "/usr/include/features.h" 3 4 + +#ifdef _LARGEFILE_SOURCE +# define __USE_LARGEFILE 1 +#endif +# 306 "/usr/include/features.h" 3 4 + +#ifdef _LARGEFILE64_SOURCE +# define __USE_LARGEFILE64 1 +#endif +# 310 "/usr/include/features.h" 3 4 + +#if defined _FILE_OFFSET_BITS && _FILE_OFFSET_BITS == 64 +# define __USE_FILE_OFFSET64 1 +#endif +# 314 "/usr/include/features.h" 3 4 + +#if defined _BSD_SOURCE || defined _SVID_SOURCE +# define __USE_MISC 1 +#endif +# 318 "/usr/include/features.h" 3 4 + +#ifdef _BSD_SOURCE +# define __USE_BSD 1 +#endif +# 322 "/usr/include/features.h" 3 4 + +#ifdef _SVID_SOURCE +# define __USE_SVID 1 +#endif +# 326 "/usr/include/features.h" 3 4 + +#ifdef _ATFILE_SOURCE +# define __USE_ATFILE 1 +#endif +# 330 "/usr/include/features.h" 3 4 + +#ifdef _GNU_SOURCE +# define __USE_GNU 1 +#endif +# 334 "/usr/include/features.h" 3 4 + +#if defined _REENTRANT || defined _THREAD_SAFE +# define __USE_REENTRANT 1 +#endif +# 338 "/usr/include/features.h" 3 4 + +#if defined _FORTIFY_SOURCE && _FORTIFY_SOURCE > 0 \ + && __GNUC_PREREQ (4, 1) && defined __OPTIMIZE__ && __OPTIMIZE__ > 0 +# if _FORTIFY_SOURCE > 1 +# define __USE_FORTIFY_LEVEL 2 +# else +# 344 "/usr/include/features.h" 3 4 +# define __USE_FORTIFY_LEVEL 1 +# endif +# 346 "/usr/include/features.h" 3 4 +#else +# 347 "/usr/include/features.h" 3 4 +# define __USE_FORTIFY_LEVEL 0 +#endif +# 349 "/usr/include/features.h" 3 4 + +/* Get definitions of __STDC_* predefined macros, if the compiler has + not preincluded this header automatically. */ +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 352 "/usr/include/features.h" 3 4 +# 1 "/usr/include/stdc-predef.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STDC_PREDEF_H +#define _STDC_PREDEF_H 1 + +/* This header is separate from features.h so that the compiler can + include it implicitly at the start of every compilation. It must + not itself include or any other header that includes + because the implicit include comes before any feature + test macros that may be defined in a source file before it first + explicitly includes a system header. GCC knows the name of this + header in order to preinclude it. */ + +/* glibc's intent is to support the IEC 559 math functionality, real + and complex. If the GCC (4.9 and later) predefined macros + specifying compiler intent are available, use them to determine + whether the overall intent is to support these features; otherwise, + presume an older compiler has intent to support these features and + define these macros by default. */ + +#ifdef __GCC_IEC_559 +# if __GCC_IEC_559 > 0 +# define __STDC_IEC_559__ 1 +# endif +# 40 "/usr/include/stdc-predef.h" 3 4 +#else +# 41 "/usr/include/stdc-predef.h" 3 4 +# define __STDC_IEC_559__ 1 +#endif +# 43 "/usr/include/stdc-predef.h" 3 4 + +#ifdef __GCC_IEC_559_COMPLEX +# if __GCC_IEC_559_COMPLEX > 0 +# define __STDC_IEC_559_COMPLEX__ 1 +# endif +# 48 "/usr/include/stdc-predef.h" 3 4 +#else +# 49 "/usr/include/stdc-predef.h" 3 4 +# define __STDC_IEC_559_COMPLEX__ 1 +#endif +# 51 "/usr/include/stdc-predef.h" 3 4 + +/* wchar_t uses ISO/IEC 10646 (2nd ed., published 2011-03-15) / + Unicode 6.0. */ +#define __STDC_ISO_10646__ 201103L + +/* We do not support C11 . */ +#define __STDC_NO_THREADS__ 1 + +#endif +# 60 "/usr/include/stdc-predef.h" 3 4 +# 353 "/usr/include/features.h" 2 3 4 + +/* This macro indicates that the installed library is the GNU C Library. + For historic reasons the value now is 6 and this will stay from now + on. The use of this variable is deprecated. Use __GLIBC__ and + __GLIBC_MINOR__ now (see below) when you want to test for a specific + GNU C library version and use the values in to get + the sonames of the shared libraries. */ +#undef __GNU_LIBRARY__ +#define __GNU_LIBRARY__ 6 + +/* Major and minor version number of the GNU C library package. Use + these macros to test for features in specific releases. */ +#define __GLIBC__ 2 +#define __GLIBC_MINOR__ 19 + +#define __GLIBC_PREREQ(maj, min) \ + ((__GLIBC__ << 16) + __GLIBC_MINOR__ >= ((maj) << 16) + (min)) + +/* This is here only because every header file already includes this one. */ +#ifndef __ASSEMBLER__ +# ifndef _SYS_CDEFS_H +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 374 "/usr/include/features.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 1 3 4 +/* Copyright (C) 1992-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _SYS_CDEFS_H +#define _SYS_CDEFS_H 1 + +/* We are almost always included from features.h. */ +#ifndef _FEATURES_H +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 23 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# 24 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#endif +# 25 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* The GNU libc does not support any K&R compilers or the traditional mode + of ISO C compilers anymore. Check for some of the combinations not + anymore supported. */ +#if defined __GNUC__ && !defined __STDC__ +# error "You need a ISO C conforming compiler to use the glibc headers" +#endif +# 32 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* Some user header file might have defined this before. */ +#undef __P +#undef __PMT + +#ifdef __GNUC__ + +/* All functions, except those with callbacks or those that + synchronize memory, are leaf functions. */ +# if __GNUC_PREREQ (4, 6) && !defined _LIBC +# define __LEAF , __leaf__ +# define __LEAF_ATTR __attribute__ ((__leaf__)) +# else +# 45 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __LEAF +# define __LEAF_ATTR +# endif +# 48 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* GCC can always grok prototypes. For C++ programs we add throw() + to help it optimize the function calls. But this works only with + gcc 2.8.x and egcs. For gcc 3.2 and up we even mark C functions + as non-throwing using a function attribute since programs can use + the -fexceptions options for C code as well. */ +# if !defined __cplusplus && __GNUC_PREREQ (3, 3) +# define __THROW __attribute__ ((__nothrow__ __LEAF)) +# define __THROWNL __attribute__ ((__nothrow__)) +# define __NTH(fct) __attribute__ ((__nothrow__ __LEAF)) fct +# else +# 59 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# if defined __cplusplus && __GNUC_PREREQ (2,8) +# define __THROW throw () +# define __THROWNL throw () +# define __NTH(fct) __LEAF_ATTR fct throw () +# else +# 64 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __THROW +# define __THROWNL +# define __NTH(fct) fct +# endif +# 68 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# endif +# 69 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +#else /* Not GCC. */ +# 71 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +# define __inline /* No inline functions. */ + +# define __THROW +# define __THROWNL +# define __NTH(fct) fct + +#endif /* GCC. */ +# 79 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* These two macros are not used in glibc anymore. They are kept here + only because some other projects expect the macros to be defined. */ +#define __P(args) args +#define __PMT(args) args + +/* For these things, GCC behaves the ANSI way normally, + and the non-ANSI way under -traditional. */ + +#define __CONCAT(x,y) x ## y +#define __STRING(x) #x + +/* This is not a typedef so `const __ptr_t' does the right thing. */ +#define __ptr_t void * +#define __long_double_t long double + + +/* C++ needs to know that types and declarations are C, not C++. */ +#ifdef __cplusplus +# define __BEGIN_DECLS extern "C" { +# define __END_DECLS } +#else +# 101 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __BEGIN_DECLS +# define __END_DECLS +#endif +# 104 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + + +/* The standard library needs the functions from the ISO C90 standard + in the std namespace. At the same time we want to be safe for + future changes and we include the ISO C99 code in the non-standard + namespace __c99. The C++ wrapper header take case of adding the + definitions to the global namespace. */ +#if defined __cplusplus && defined _GLIBCPP_USE_NAMESPACES +# define __BEGIN_NAMESPACE_STD namespace std { +# define __END_NAMESPACE_STD } +# define __USING_NAMESPACE_STD(name) using std::name; +# define __BEGIN_NAMESPACE_C99 namespace __c99 { +# define __END_NAMESPACE_C99 } +# define __USING_NAMESPACE_C99(name) using __c99::name; +#else +# 119 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +/* For compatibility we do not add the declarations into any + namespace. They will end up in the global namespace which is what + old code expects. */ +# define __BEGIN_NAMESPACE_STD +# define __END_NAMESPACE_STD +# define __USING_NAMESPACE_STD(name) +# define __BEGIN_NAMESPACE_C99 +# define __END_NAMESPACE_C99 +# define __USING_NAMESPACE_C99(name) +#endif +# 129 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + + +/* Fortify support. */ +#define __bos(ptr) __builtin_object_size (ptr, __USE_FORTIFY_LEVEL > 1) +#define __bos0(ptr) __builtin_object_size (ptr, 0) +#define __fortify_function __extern_always_inline __attribute_artificial__ + +#if __GNUC_PREREQ (4,3) +# define __warndecl(name, msg) \ + extern void name (void) __attribute__((__warning__ (msg))) +# define __warnattr(msg) __attribute__((__warning__ (msg))) +# define __errordecl(name, msg) \ + extern void name (void) __attribute__((__error__ (msg))) +#else +# 143 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __warndecl(name, msg) extern void name (void) +# define __warnattr(msg) +# define __errordecl(name, msg) extern void name (void) +#endif +# 147 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* Support for flexible arrays. */ +#if __GNUC_PREREQ (2,97) +/* GCC 2.97 supports C99 flexible array members. */ +# define __flexarr [] +#else +# 153 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# ifdef __GNUC__ +# define __flexarr [0] +# else +# 156 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __flexarr [] +# else +# 159 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +/* Some other non-C99 compiler. Approximate with [1]. */ +# define __flexarr [1] +# endif +# 162 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# endif +# 163 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#endif +# 164 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + + +/* __asm__ ("xyz") is used throughout the headers to rename functions + at the assembly language level. This is wrapped by the __REDIRECT + macro, in order to support compilers that can do this some other + way. When compilers don't support asm-names at all, we have to do + preprocessor tricks instead (which don't have exactly the right + semantics, but it's the best we can do). + + Example: + int __REDIRECT(setpgrp, (__pid_t pid, __pid_t pgrp), setpgid); */ + +#if defined __GNUC__ && __GNUC__ >= 2 + +# define __REDIRECT(name, proto, alias) name proto __asm__ (__ASMNAME (#alias)) +# ifdef __cplusplus +# define __REDIRECT_NTH(name, proto, alias) \ + name proto __THROW __asm__ (__ASMNAME (#alias)) +# define __REDIRECT_NTHNL(name, proto, alias) \ + name proto __THROWNL __asm__ (__ASMNAME (#alias)) +# else +# 185 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __REDIRECT_NTH(name, proto, alias) \ + name proto __asm__ (__ASMNAME (#alias)) __THROW +# define __REDIRECT_NTHNL(name, proto, alias) \ + name proto __asm__ (__ASMNAME (#alias)) __THROWNL +# endif +# 190 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __ASMNAME(cname) __ASMNAME2 (__USER_LABEL_PREFIX__, cname) +# define __ASMNAME2(prefix, cname) __STRING (prefix) cname + +/* +#elif __SOME_OTHER_COMPILER__ + +# define __REDIRECT(name, proto, alias) name proto; \ + _Pragma("let " #name " = " #alias) +*/ +#endif +# 200 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* GCC has various useful declarations that can be made with the + `__attribute__' syntax. All of the ways we use this do fine if + they are omitted for compilers that don't understand it. */ +#if !defined __GNUC__ || __GNUC__ < 2 +# define __attribute__(xyz) /* Ignore */ +#endif +# 207 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* At some point during the gcc 2.96 development the `malloc' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (2,96) +# define __attribute_malloc__ __attribute__ ((__malloc__)) +#else +# 214 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_malloc__ /* Ignore */ +#endif +# 216 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* Tell the compiler which arguments to an allocation function + indicate the size of the allocation. */ +#if __GNUC_PREREQ (4, 3) +# define __attribute_alloc_size__(params) \ + __attribute__ ((__alloc_size__ params)) +#else +# 223 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_alloc_size__(params) /* Ignore. */ +#endif +# 225 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* At some point during the gcc 2.96 development the `pure' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (2,96) +# define __attribute_pure__ __attribute__ ((__pure__)) +#else +# 232 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_pure__ /* Ignore */ +#endif +# 234 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* This declaration tells the compiler that the value is constant. */ +#if __GNUC_PREREQ (2,5) +# define __attribute_const__ __attribute__ ((__const__)) +#else +# 239 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_const__ /* Ignore */ +#endif +# 241 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* At some point during the gcc 3.1 development the `used' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. */ +#if __GNUC_PREREQ (3,1) +# define __attribute_used__ __attribute__ ((__used__)) +# define __attribute_noinline__ __attribute__ ((__noinline__)) +#else +# 249 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_used__ __attribute__ ((__unused__)) +# define __attribute_noinline__ /* Ignore */ +#endif +# 252 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* gcc allows marking deprecated functions. */ +#if __GNUC_PREREQ (3,2) +# define __attribute_deprecated__ __attribute__ ((__deprecated__)) +#else +# 257 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_deprecated__ /* Ignore */ +#endif +# 259 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* At some point during the gcc 2.8 development the `format_arg' attribute + for functions was introduced. We don't want to use it unconditionally + (although this would be possible) since it generates warnings. + If several `format_arg' attributes are given for the same function, in + gcc-3.0 and older, all but the last one are ignored. In newer gccs, + all designated arguments are considered. */ +#if __GNUC_PREREQ (2,8) +# define __attribute_format_arg__(x) __attribute__ ((__format_arg__ (x))) +#else +# 269 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_format_arg__(x) /* Ignore */ +#endif +# 271 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* At some point during the gcc 2.97 development the `strfmon' format + attribute for functions was introduced. We don't want to use it + unconditionally (although this would be possible) since it + generates warnings. */ +#if __GNUC_PREREQ (2,97) +# define __attribute_format_strfmon__(a,b) \ + __attribute__ ((__format__ (__strfmon__, a, b))) +#else +# 280 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_format_strfmon__(a,b) /* Ignore */ +#endif +# 282 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* The nonull function attribute allows to mark pointer parameters which + must not be NULL. */ +#if __GNUC_PREREQ (3,3) +# define __nonnull(params) __attribute__ ((__nonnull__ params)) +#else +# 288 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __nonnull(params) +#endif +# 290 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* If fortification mode, we warn about unused results of certain + function calls which can lead to problems. */ +#if __GNUC_PREREQ (3,4) +# define __attribute_warn_unused_result__ \ + __attribute__ ((__warn_unused_result__)) +# if __USE_FORTIFY_LEVEL > 0 +# define __wur __attribute_warn_unused_result__ +# endif +# 299 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#else +# 300 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_warn_unused_result__ /* empty */ +#endif +# 302 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#ifndef __wur +# define __wur /* Ignore */ +#endif +# 305 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* Forces a function to be always inlined. */ +#if __GNUC_PREREQ (3,2) +# define __always_inline __inline __attribute__ ((__always_inline__)) +#else +# 310 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __always_inline __inline +#endif +# 312 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* Associate error messages with the source location of the call site rather + than with the source location inside the function. */ +#if __GNUC_PREREQ (4,3) +# define __attribute_artificial__ __attribute__ ((__artificial__)) +#else +# 318 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __attribute_artificial__ /* Ignore */ +#endif +# 320 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +#ifdef __GNUC__ +/* One of these will be defined if the __gnu_inline__ attribute is + available. In C++, __GNUC_GNU_INLINE__ will be defined even though + __inline does not use the GNU inlining rules. If neither macro is + defined, this version of GCC only supports GNU inline semantics. */ +# if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__ +# define __extern_inline extern __inline __attribute__ ((__gnu_inline__)) +# define __extern_always_inline \ + extern __always_inline __attribute__ ((__gnu_inline__)) +# else +# 331 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __extern_inline extern __inline +# define __extern_always_inline extern __always_inline +# endif +# 334 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#else /* Not GCC. */ +# 335 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __extern_inline /* Ignore */ +# define __extern_always_inline /* Ignore */ +#endif +# 338 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* GCC 4.3 and above allow passing all anonymous arguments of an + __extern_always_inline function to some other vararg function. */ +#if __GNUC_PREREQ (4,3) +# define __va_arg_pack() __builtin_va_arg_pack () +# define __va_arg_pack_len() __builtin_va_arg_pack_len () +#endif +# 345 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* It is possible to compile containing GCC extensions even if GCC is + run in pedantic mode if the uses are carefully marked using the + `__extension__' keyword. But this is not generally available before + version 2.8. */ +#if !__GNUC_PREREQ (2,8) +# define __extension__ /* Ignore */ +#endif +# 353 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* __restrict is known in EGCS 1.2 and above. */ +#if !__GNUC_PREREQ (2,92) +# define __restrict /* Ignore */ +#endif +# 358 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +/* ISO C99 also allows to declare arrays as non-overlapping. The syntax is + array_name[restrict] + GCC 3.1 supports this. */ +#if __GNUC_PREREQ (3,1) && !defined __GNUG__ +# define __restrict_arr __restrict +#else +# 365 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# ifdef __GNUC__ +# define __restrict_arr /* Not supported in old GCC. */ +# else +# 368 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __restrict_arr restrict +# else +# 371 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +/* Some other non-C99 compiler. */ +# define __restrict_arr /* Not supported. */ +# endif +# 374 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# endif +# 375 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#endif +# 376 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +#if __GNUC__ >= 3 +# define __glibc_unlikely(cond) __builtin_expect ((cond), 0) +# define __glibc_likely(cond) __builtin_expect ((cond), 1) +#else +# 381 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# define __glibc_unlikely(cond) (cond) +# define __glibc_likely(cond) (cond) +#endif +# 384 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 385 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4 +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __x86_64__ && !defined __ILP32__ +# define __WORDSIZE 64 +#else +# 6 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 +# define __WORDSIZE 32 +#endif +# 8 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 + +#ifdef __x86_64__ +# define __WORDSIZE_TIME64_COMPAT32 1 +/* Both x86-64 and x32 use the 64-bit system call interface. */ +# define __SYSCALL_WORDSIZE 64 +#endif +# 14 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 +# 386 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 2 3 4 + +#if defined __LONG_DOUBLE_MATH_OPTIONAL && defined __NO_LONG_DOUBLE_MATH +# define __LDBL_COMPAT 1 +# ifdef __REDIRECT +# define __LDBL_REDIR1(name, proto, alias) __REDIRECT (name, proto, alias) +# define __LDBL_REDIR(name, proto) \ + __LDBL_REDIR1 (name, proto, __nldbl_##name) +# define __LDBL_REDIR1_NTH(name, proto, alias) __REDIRECT_NTH (name, proto, alias) +# define __LDBL_REDIR_NTH(name, proto) \ + __LDBL_REDIR1_NTH (name, proto, __nldbl_##name) +# define __LDBL_REDIR1_DECL(name, alias) \ + extern __typeof (name) name __asm (__ASMNAME (#alias)); +# define __LDBL_REDIR_DECL(name) \ + extern __typeof (name) name __asm (__ASMNAME ("__nldbl_" #name)); +# define __REDIRECT_LDBL(name, proto, alias) \ + __LDBL_REDIR1 (name, proto, __nldbl_##alias) +# define __REDIRECT_NTH_LDBL(name, proto, alias) \ + __LDBL_REDIR1_NTH (name, proto, __nldbl_##alias) +# endif +# 405 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#endif +# 406 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#if !defined __LDBL_COMPAT || !defined __REDIRECT +# define __LDBL_REDIR1(name, proto, alias) name proto +# define __LDBL_REDIR(name, proto) name proto +# define __LDBL_REDIR1_NTH(name, proto, alias) name proto __THROW +# define __LDBL_REDIR_NTH(name, proto) name proto __THROW +# define __LDBL_REDIR_DECL(name) +# ifdef __REDIRECT +# define __REDIRECT_LDBL(name, proto, alias) __REDIRECT (name, proto, alias) +# define __REDIRECT_NTH_LDBL(name, proto, alias) \ + __REDIRECT_NTH (name, proto, alias) +# endif +# 417 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +#endif +# 418 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 + +#endif /* sys/cdefs.h */ +# 420 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4 +# 375 "/usr/include/features.h" 2 3 4 +# endif +# 376 "/usr/include/features.h" 3 4 + +/* If we don't have __REDIRECT, prototypes will be missing if + __USE_FILE_OFFSET64 but not __USE_LARGEFILE[64]. */ +# if defined __USE_FILE_OFFSET64 && !defined __REDIRECT +# define __USE_LARGEFILE 1 +# define __USE_LARGEFILE64 1 +# endif +# 383 "/usr/include/features.h" 3 4 + +#endif /* !ASSEMBLER */ +# 385 "/usr/include/features.h" 3 4 + +/* Decide whether we can define 'extern inline' functions in headers. */ +#if __GNUC_PREREQ (2, 7) && defined __OPTIMIZE__ \ + && !defined __OPTIMIZE_SIZE__ && !defined __NO_INLINE__ \ + && defined __extern_inline +# define __USE_EXTERN_INLINES 1 +#endif +# 392 "/usr/include/features.h" 3 4 + + +/* This is here only because every header file already includes this one. + Get the definitions of all the appropriate `__stub_FUNCTION' symbols. + contains `#define __stub_FUNCTION' when FUNCTION is a stub + that will always return failure (and set errno to ENOSYS). */ +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 398 "/usr/include/features.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 1 3 4 +/* This file is automatically generated. + This file selects the right generated file of `__stub_FUNCTION' macros + based on the architecture being compiled for. */ + + +#if !defined __x86_64__ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 7 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +# 8 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +#endif +# 9 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +#if defined __x86_64__ && defined __LP64__ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 10 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h" 1 3 4 +/* This file is automatically generated. + It defines a symbol `__stub_FUNCTION' for each function + in the C library which is a stub, meaning it will fail + every time called, usually setting errno to ENOSYS. */ + +#ifdef _LIBC +# error Applications may not define the macro _LIBC +#endif +# 9 "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h" 3 4 + +#define __stub_bdflush +#define __stub_chflags +#define __stub_fattach +#define __stub_fchflags +#define __stub_fdetach +#define __stub_getmsg +#define __stub_gtty +#define __stub_lchmod +#define __stub_putmsg +#define __stub_revoke +#define __stub_setlogin +#define __stub_sigreturn +#define __stub_sstk +#define __stub_stty +# 11 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 2 3 4 +#endif +# 12 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +#if defined __x86_64__ && defined __ILP32__ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 13 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +# 14 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +#endif +# 15 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4 +# 399 "/usr/include/features.h" 2 3 4 + + +#endif /* features.h */ +# 402 "/usr/include/features.h" 3 4 +# 36 "/usr/include/assert.h" 2 3 4 + +#if defined __cplusplus && __GNUC_PREREQ (2,95) +# define __ASSERT_VOID_CAST static_cast +#else +# 40 "/usr/include/assert.h" 3 4 +# define __ASSERT_VOID_CAST (void) +#endif +# 42 "/usr/include/assert.h" 3 4 + +/* void assert (int expression); + + If NDEBUG is defined, do nothing. + If not, and EXPRESSION is zero, print an error message and abort. */ + +#ifdef NDEBUG + +# define assert(expr) (__ASSERT_VOID_CAST (0)) + +/* void assert_perror (int errnum); + + If NDEBUG is defined, do nothing. If not, and ERRNUM is not zero, print an + error message with the error text for ERRNUM and abort. + (This is a GNU extension.) */ + +# ifdef __USE_GNU +# define assert_perror(errnum) (__ASSERT_VOID_CAST (0)) +# endif +# 61 "/usr/include/assert.h" 3 4 + +#else /* Not NDEBUG. */ +# 63 "/usr/include/assert.h" 3 4 + +#ifndef _ASSERT_H_DECLS +#define _ASSERT_H_DECLS +__BEGIN_DECLS + +/* This prints an "Assertion failed" message and aborts. */ +extern void __assert_fail (const char *__assertion, const char *__file, + unsigned int __line, const char *__function) + __THROW __attribute__ ((__noreturn__)); + +/* Likewise, but prints the error text for ERRNUM. */ +extern void __assert_perror_fail (int __errnum, const char *__file, + unsigned int __line, const char *__function) + __THROW __attribute__ ((__noreturn__)); + + +/* The following is not at all used here but needed for standard + compliance. */ +extern void __assert (const char *__assertion, const char *__file, int __line) + __THROW __attribute__ ((__noreturn__)); + + +__END_DECLS +#endif /* Not _ASSERT_H_DECLS */ +# 87 "/usr/include/assert.h" 3 4 + +# define assert(expr) \ + ((expr) \ + ? __ASSERT_VOID_CAST (0) \ + : __assert_fail (__STRING(expr), __FILE__, __LINE__, __ASSERT_FUNCTION)) + +# ifdef __USE_GNU +# define assert_perror(errnum) \ + (!(errnum) \ + ? __ASSERT_VOID_CAST (0) \ + : __assert_perror_fail ((errnum), __FILE__, __LINE__, __ASSERT_FUNCTION)) +# endif +# 99 "/usr/include/assert.h" 3 4 + +/* Version 2.4 and later of GCC define a magical variable `__PRETTY_FUNCTION__' + which contains the name of the function currently being defined. + This is broken in G++ before version 2.6. + C9x has a similar variable called __func__, but prefer the GCC one since + it demangles C++ function names. */ +# if defined __cplusplus ? __GNUC_PREREQ (2, 6) : __GNUC_PREREQ (2, 4) +# define __ASSERT_FUNCTION __PRETTY_FUNCTION__ +# else +# 108 "/usr/include/assert.h" 3 4 +# if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L +# define __ASSERT_FUNCTION __func__ +# else +# 111 "/usr/include/assert.h" 3 4 +# define __ASSERT_FUNCTION ((const char *) 0) +# endif +# 113 "/usr/include/assert.h" 3 4 +# endif +# 114 "/usr/include/assert.h" 3 4 + +#endif /* NDEBUG. */ +# 116 "/usr/include/assert.h" 3 4 + + +#if defined __USE_ISOC11 && !defined __cplusplus +/* Static assertion. Requires support in the compiler. */ +# undef static_assert +# define static_assert _Static_assert +#endif +# 123 "/usr/include/assert.h" 3 4 +# 2 "oski.c" 2 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 2 "oski.c" +# 1 "/usr/include/stdio.h" 1 3 4 +/* Define ISO C stdio on top of C++ iostreams. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * ISO C99 Standard: 7.19 Input/output + */ + +#ifndef _STDIO_H + +#if !defined __need_FILE && !defined __need___FILE +# define _STDIO_H 1 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 27 "/usr/include/stdio.h" 3 4 +# 28 "/usr/include/stdio.h" 3 4 + +__BEGIN_DECLS + +# define __need_size_t +# define __need_NULL +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 33 "/usr/include/stdio.h" 3 4 +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 1 3 4 +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ + defined(__need_size_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_wint_t) + +#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ + !defined(__need_wchar_t) && !defined(__need_NULL) && \ + !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) +#define __STDDEF_H +#endif +# 37 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define __need_ptrdiff_t +#define __need_size_t +#define __need_wchar_t +#define __need_NULL +#define __need_STDDEF_H_misc +/* __need_wint_t is intentionally not defined here. */ +#endif +# 44 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_ptrdiff_t) +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +# 51 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif +# 53 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_ptrdiff_t +#endif /* defined(__need_ptrdiff_t) */ +# 55 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_size_t) +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +# 62 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ size_t; +#endif +# 64 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_size_t +#endif /*defined(__need_size_t) */ +# 66 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +# 76 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ rsize_t; +#endif +# 78 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif /* defined(__need_STDDEF_H_misc) */ +# 79 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_wchar_t) +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +# 89 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 90 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WCHAR_TYPE__ wchar_t; +#endif +# 92 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 93 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wchar_t +#endif /* defined(__need_wchar_t) */ +# 95 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# 102 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL 0 +# endif +# 104 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#else +# 105 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL ((void*)0) +#endif +# 107 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +# 112 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 113 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_NULL +#endif /* defined(__need_NULL) */ +# 115 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L +#if 0 /* expanded by -frewrite-includes */ +#include "__stddef_max_align_t.h" +#endif /* expanded by -frewrite-includes */ +# 118 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 119 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 120 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define offsetof(t, d) __builtin_offsetof(t, d) +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ +# 123 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +# 132 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WINT_TYPE__ wint_t; +#endif +# 134 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wint_t +#endif /* __need_wint_t */ +# 136 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#endif +# 138 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 34 "/usr/include/stdio.h" 2 3 4 + +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 35 "/usr/include/stdio.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/types.h" 1 3 4 +/* bits/types.h -- definitions of __*_t types underlying *_t types. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * Never include this file directly; use instead. + */ + +#ifndef _BITS_TYPES_H +#define _BITS_TYPES_H 1 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 26 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# 27 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 27 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4 +/* Determine the wordsize from the preprocessor defines. */ + +#if defined __x86_64__ && !defined __ILP32__ +# define __WORDSIZE 64 +#else +# 6 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 +# define __WORDSIZE 32 +#endif +# 8 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 + +#ifdef __x86_64__ +# define __WORDSIZE_TIME64_COMPAT32 1 +/* Both x86-64 and x32 use the 64-bit system call interface. */ +# define __SYSCALL_WORDSIZE 64 +#endif +# 14 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 3 4 +# 28 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4 + +/* Convenience types. */ +typedef unsigned char __u_char; +typedef unsigned short int __u_short; +typedef unsigned int __u_int; +typedef unsigned long int __u_long; + +/* Fixed-size types, underlying types depend on word size and compiler. */ +typedef signed char __int8_t; +typedef unsigned char __uint8_t; +typedef signed short int __int16_t; +typedef unsigned short int __uint16_t; +typedef signed int __int32_t; +typedef unsigned int __uint32_t; +#if __WORDSIZE == 64 +typedef signed long int __int64_t; +typedef unsigned long int __uint64_t; +#else +# 46 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +__extension__ typedef signed long long int __int64_t; +__extension__ typedef unsigned long long int __uint64_t; +#endif +# 49 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 + +/* quad_t is also 64 bits. */ +#if __WORDSIZE == 64 +typedef long int __quad_t; +typedef unsigned long int __u_quad_t; +#else +# 55 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +__extension__ typedef long long int __quad_t; +__extension__ typedef unsigned long long int __u_quad_t; +#endif +# 58 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 + + +/* The machine-dependent file defines __*_T_TYPE + macros for each of the OS types we define below. The definitions + of those macros must use the following macros for underlying types. + We define __S_TYPE and __U_TYPE for the signed and unsigned + variants of each of the following integer types on this machine. + + 16 -- "natural" 16-bit type (always short) + 32 -- "natural" 32-bit type (always int) + 64 -- "natural" 64-bit type (long or long long) + LONG32 -- 32-bit type, traditionally long + QUAD -- 64-bit type, always long long + WORD -- natural type of __WORDSIZE bits (int or long) + LONGWORD -- type of __WORDSIZE bits, traditionally long + + We distinguish WORD/LONGWORD, 32/LONG32, and 64/QUAD so that the + conventional uses of `long' or `long long' type modifiers match the + types we define, even when a less-adorned type would be the same size. + This matters for (somewhat) portably writing printf/scanf formats for + these types, where using the appropriate l or ll format modifiers can + make the typedefs and the formats match up across all GNU platforms. If + we used `long' when it's 64 bits where `long long' is expected, then the + compiler would warn about the formats not matching the argument types, + and the programmer changing them to shut up the compiler would break the + program's portability. + + Here we assume what is presently the case in all the GCC configurations + we support: long long is always 64 bits, long is always word/address size, + and int is always 32 bits. */ + +#define __S16_TYPE short int +#define __U16_TYPE unsigned short int +#define __S32_TYPE int +#define __U32_TYPE unsigned int +#define __SLONGWORD_TYPE long int +#define __ULONGWORD_TYPE unsigned long int +#if __WORDSIZE == 32 +# define __SQUAD_TYPE __quad_t +# define __UQUAD_TYPE __u_quad_t +# define __SWORD_TYPE int +# define __UWORD_TYPE unsigned int +# define __SLONG32_TYPE long int +# define __ULONG32_TYPE unsigned long int +# define __S64_TYPE __quad_t +# define __U64_TYPE __u_quad_t +/* We want __extension__ before typedef's that use nonstandard base types + such as `long long' in C89 mode. */ +# define __STD_TYPE __extension__ typedef +#elif __WORDSIZE == 64 +# 108 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# define __SQUAD_TYPE long int +# define __UQUAD_TYPE unsigned long int +# define __SWORD_TYPE long int +# define __UWORD_TYPE unsigned long int +# define __SLONG32_TYPE int +# define __ULONG32_TYPE unsigned int +# define __S64_TYPE long int +# define __U64_TYPE unsigned long int +/* No need to mark the typedef with __extension__. */ +# define __STD_TYPE typedef +#else +# 119 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# error +#endif +# 121 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +#include /* Defines __*_T_TYPE macros. */ +#endif /* expanded by -frewrite-includes */ +# 121 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 1 3 4 +/* bits/typesizes.h -- underlying types for *_t. Linux/x86-64 version. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _BITS_TYPES_H +# error "Never include directly; use instead." +#endif +# 22 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 + +#ifndef _BITS_TYPESIZES_H +#define _BITS_TYPESIZES_H 1 + +/* See for the meaning of these macros. This file exists so + that need not vary across different GNU platforms. */ + +/* X32 kernel interface is 64-bit. */ +#if defined __x86_64__ && defined __ILP32__ +# define __SYSCALL_SLONG_TYPE __SQUAD_TYPE +# define __SYSCALL_ULONG_TYPE __UQUAD_TYPE +#else +# 34 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 +# define __SYSCALL_SLONG_TYPE __SLONGWORD_TYPE +# define __SYSCALL_ULONG_TYPE __ULONGWORD_TYPE +#endif +# 37 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 + +#define __DEV_T_TYPE __UQUAD_TYPE +#define __UID_T_TYPE __U32_TYPE +#define __GID_T_TYPE __U32_TYPE +#define __INO_T_TYPE __SYSCALL_ULONG_TYPE +#define __INO64_T_TYPE __UQUAD_TYPE +#define __MODE_T_TYPE __U32_TYPE +#ifdef __x86_64__ +# define __NLINK_T_TYPE __SYSCALL_ULONG_TYPE +# define __FSWORD_T_TYPE __SYSCALL_SLONG_TYPE +#else +# 48 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 +# define __NLINK_T_TYPE __UWORD_TYPE +# define __FSWORD_T_TYPE __SWORD_TYPE +#endif +# 51 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 +#define __OFF_T_TYPE __SYSCALL_SLONG_TYPE +#define __OFF64_T_TYPE __SQUAD_TYPE +#define __PID_T_TYPE __S32_TYPE +#define __RLIM_T_TYPE __SYSCALL_ULONG_TYPE +#define __RLIM64_T_TYPE __UQUAD_TYPE +#define __BLKCNT_T_TYPE __SYSCALL_SLONG_TYPE +#define __BLKCNT64_T_TYPE __SQUAD_TYPE +#define __FSBLKCNT_T_TYPE __SYSCALL_ULONG_TYPE +#define __FSBLKCNT64_T_TYPE __UQUAD_TYPE +#define __FSFILCNT_T_TYPE __SYSCALL_ULONG_TYPE +#define __FSFILCNT64_T_TYPE __UQUAD_TYPE +#define __ID_T_TYPE __U32_TYPE +#define __CLOCK_T_TYPE __SYSCALL_SLONG_TYPE +#define __TIME_T_TYPE __SYSCALL_SLONG_TYPE +#define __USECONDS_T_TYPE __U32_TYPE +#define __SUSECONDS_T_TYPE __SYSCALL_SLONG_TYPE +#define __DADDR_T_TYPE __S32_TYPE +#define __KEY_T_TYPE __S32_TYPE +#define __CLOCKID_T_TYPE __S32_TYPE +#define __TIMER_T_TYPE void * +#define __BLKSIZE_T_TYPE __SYSCALL_SLONG_TYPE +#define __FSID_T_TYPE struct { int __val[2]; } +#define __SSIZE_T_TYPE __SWORD_TYPE + +#ifdef __x86_64__ +/* Tell the libc code that off_t and off64_t are actually the same type + for all ABI purposes, even if possibly expressed as different base types + for C type-checking purposes. */ +# define __OFF_T_MATCHES_OFF64_T 1 + +/* Same for ino_t and ino64_t. */ +# define __INO_T_MATCHES_INO64_T 1 +#endif +# 84 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 + +/* Number of descriptors that can fit in an `fd_set'. */ +#define __FD_SETSIZE 1024 + + +#endif /* bits/typesizes.h */ +# 90 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 3 4 +# 122 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4 + + +__STD_TYPE __DEV_T_TYPE __dev_t; /* Type of device numbers. */ +__STD_TYPE __UID_T_TYPE __uid_t; /* Type of user identifications. */ +__STD_TYPE __GID_T_TYPE __gid_t; /* Type of group identifications. */ +__STD_TYPE __INO_T_TYPE __ino_t; /* Type of file serial numbers. */ +__STD_TYPE __INO64_T_TYPE __ino64_t; /* Type of file serial numbers (LFS).*/ +__STD_TYPE __MODE_T_TYPE __mode_t; /* Type of file attribute bitmasks. */ +__STD_TYPE __NLINK_T_TYPE __nlink_t; /* Type of file link counts. */ +__STD_TYPE __OFF_T_TYPE __off_t; /* Type of file sizes and offsets. */ +__STD_TYPE __OFF64_T_TYPE __off64_t; /* Type of file sizes and offsets (LFS). */ +__STD_TYPE __PID_T_TYPE __pid_t; /* Type of process identifications. */ +__STD_TYPE __FSID_T_TYPE __fsid_t; /* Type of file system IDs. */ +__STD_TYPE __CLOCK_T_TYPE __clock_t; /* Type of CPU usage counts. */ +__STD_TYPE __RLIM_T_TYPE __rlim_t; /* Type for resource measurement. */ +__STD_TYPE __RLIM64_T_TYPE __rlim64_t; /* Type for resource measurement (LFS). */ +__STD_TYPE __ID_T_TYPE __id_t; /* General type for IDs. */ +__STD_TYPE __TIME_T_TYPE __time_t; /* Seconds since the Epoch. */ +__STD_TYPE __USECONDS_T_TYPE __useconds_t; /* Count of microseconds. */ +__STD_TYPE __SUSECONDS_T_TYPE __suseconds_t; /* Signed count of microseconds. */ + +__STD_TYPE __DADDR_T_TYPE __daddr_t; /* The type of a disk address. */ +__STD_TYPE __KEY_T_TYPE __key_t; /* Type of an IPC key. */ + +/* Clock ID used in clock and timer functions. */ +__STD_TYPE __CLOCKID_T_TYPE __clockid_t; + +/* Timer ID returned by `timer_create'. */ +__STD_TYPE __TIMER_T_TYPE __timer_t; + +/* Type to represent block size. */ +__STD_TYPE __BLKSIZE_T_TYPE __blksize_t; + +/* Types from the Large File Support interface. */ + +/* Type to count number of disk blocks. */ +__STD_TYPE __BLKCNT_T_TYPE __blkcnt_t; +__STD_TYPE __BLKCNT64_T_TYPE __blkcnt64_t; + +/* Type to count file system blocks. */ +__STD_TYPE __FSBLKCNT_T_TYPE __fsblkcnt_t; +__STD_TYPE __FSBLKCNT64_T_TYPE __fsblkcnt64_t; + +/* Type to count file system nodes. */ +__STD_TYPE __FSFILCNT_T_TYPE __fsfilcnt_t; +__STD_TYPE __FSFILCNT64_T_TYPE __fsfilcnt64_t; + +/* Type of miscellaneous file system fields. */ +__STD_TYPE __FSWORD_T_TYPE __fsword_t; + +__STD_TYPE __SSIZE_T_TYPE __ssize_t; /* Type of a byte count, or error. */ + +/* Signed long type used in system calls. */ +__STD_TYPE __SYSCALL_SLONG_TYPE __syscall_slong_t; +/* Unsigned long type used in system calls. */ +__STD_TYPE __SYSCALL_ULONG_TYPE __syscall_ulong_t; + +/* These few don't really vary by system, they always correspond + to one of the other defined types. */ +typedef __off64_t __loff_t; /* Type of file sizes and offsets (LFS). */ +typedef __quad_t *__qaddr_t; +typedef char *__caddr_t; + +/* Duplicates info from stdint.h but this is used in unistd.h. */ +__STD_TYPE __SWORD_TYPE __intptr_t; + +/* Duplicate info from sys/socket.h. */ +__STD_TYPE __U32_TYPE __socklen_t; + + +#undef __STD_TYPE + +#endif /* bits/types.h */ +# 195 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4 +# 36 "/usr/include/stdio.h" 2 3 4 +# define __need_FILE +# define __need___FILE +#endif /* Don't need FILE. */ +# 39 "/usr/include/stdio.h" 3 4 + + +#if !defined __FILE_defined && defined __need_FILE + +/* Define outside of namespace so the C++ is happy. */ +struct _IO_FILE; + +__BEGIN_NAMESPACE_STD +/* The opaque type of streams. This is the definition used elsewhere. */ +typedef struct _IO_FILE FILE; +__END_NAMESPACE_STD +#if defined __USE_LARGEFILE64 || defined __USE_SVID || defined __USE_POSIX \ + || defined __USE_BSD || defined __USE_ISOC99 || defined __USE_XOPEN \ + || defined __USE_POSIX2 +__USING_NAMESPACE_STD(FILE) +#endif +# 55 "/usr/include/stdio.h" 3 4 + +# define __FILE_defined 1 +#endif /* FILE not defined. */ +# 58 "/usr/include/stdio.h" 3 4 +#undef __need_FILE + + +#if !defined ____FILE_defined && defined __need___FILE + +/* The opaque type of streams. This is the definition used elsewhere. */ +typedef struct _IO_FILE __FILE; + +# define ____FILE_defined 1 +#endif /* __FILE not defined. */ +# 68 "/usr/include/stdio.h" 3 4 +#undef __need___FILE + + +#ifdef _STDIO_H +#define _STDIO_USES_IOSTREAM + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 74 "/usr/include/stdio.h" 3 4 +# 1 "/usr/include/libio.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Written by Per Bothner . + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . + + As a special exception, if you link the code in this file with + files compiled with a GNU compiler to produce an executable, + that does not cause the resulting executable to be covered by + the GNU Lesser General Public License. This exception does not + however invalidate any other reasons why the executable file + might be covered by the GNU Lesser General Public License. + This exception applies to code released by its copyright holders + in files containing the exception. */ + +#ifndef _IO_STDIO_H +#define _IO_STDIO_H + +#if 0 /* expanded by -frewrite-includes */ +#include <_G_config.h> +#endif /* expanded by -frewrite-includes */ +# 31 "/usr/include/libio.h" 3 4 +# 1 "/usr/include/_G_config.h" 1 3 4 +/* This file is needed by libio to define various configuration parameters. + These are always the same in the GNU C library. */ + +#ifndef _G_config_h +#define _G_config_h 1 + +/* Define types for libio in terms of the standard internal type names. */ + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 9 "/usr/include/_G_config.h" 3 4 +# 10 "/usr/include/_G_config.h" 3 4 +#define __need_size_t +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +# define __need_wchar_t +#endif +# 14 "/usr/include/_G_config.h" 3 4 +#define __need_NULL +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 15 "/usr/include/_G_config.h" 3 4 +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 1 3 4 +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ + defined(__need_size_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_wint_t) + +#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ + !defined(__need_wchar_t) && !defined(__need_NULL) && \ + !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) +#define __STDDEF_H +#endif +# 37 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define __need_ptrdiff_t +#define __need_size_t +#define __need_wchar_t +#define __need_NULL +#define __need_STDDEF_H_misc +/* __need_wint_t is intentionally not defined here. */ +#endif +# 44 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_ptrdiff_t) +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +# 51 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif +# 53 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_ptrdiff_t +#endif /* defined(__need_ptrdiff_t) */ +# 55 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_size_t) +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +# 62 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ size_t; +#endif +# 64 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_size_t +#endif /*defined(__need_size_t) */ +# 66 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +# 76 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ rsize_t; +#endif +# 78 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif /* defined(__need_STDDEF_H_misc) */ +# 79 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_wchar_t) +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +# 89 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 90 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WCHAR_TYPE__ wchar_t; +#endif +# 92 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 93 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wchar_t +#endif /* defined(__need_wchar_t) */ +# 95 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# 102 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL 0 +# endif +# 104 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#else +# 105 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL ((void*)0) +#endif +# 107 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +# 112 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 113 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_NULL +#endif /* defined(__need_NULL) */ +# 115 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L +#if 0 /* expanded by -frewrite-includes */ +#include "__stddef_max_align_t.h" +#endif /* expanded by -frewrite-includes */ +# 118 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 119 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 120 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define offsetof(t, d) __builtin_offsetof(t, d) +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ +# 123 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +# 132 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WINT_TYPE__ wint_t; +#endif +# 134 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wint_t +#endif /* __need_wint_t */ +# 136 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#endif +# 138 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 16 "/usr/include/_G_config.h" 2 3 4 +#define __need_mbstate_t +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +# define __need_wint_t +#endif +# 20 "/usr/include/_G_config.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 20 "/usr/include/_G_config.h" 3 4 +# 1 "/usr/include/wchar.h" 1 3 4 +/* Copyright (C) 1995-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * ISO C99 Standard: 7.24 + * Extended multibyte and wide character utilities + */ + +#ifndef _WCHAR_H + +#if !defined __need_mbstate_t && !defined __need_wint_t +# define _WCHAR_H 1 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 27 "/usr/include/wchar.h" 3 4 +# 28 "/usr/include/wchar.h" 3 4 +#endif +# 29 "/usr/include/wchar.h" 3 4 + +#ifdef _WCHAR_H +/* Get FILE definition. */ +# define __need___FILE +# if defined __USE_UNIX98 || defined __USE_XOPEN2K +# define __need_FILE +# endif +# 36 "/usr/include/wchar.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 36 "/usr/include/wchar.h" 3 4 +# 37 "/usr/include/wchar.h" 3 4 +/* Get va_list definition. */ +# define __need___va_list +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 39 "/usr/include/wchar.h" 3 4 +# 40 "/usr/include/wchar.h" 3 4 + +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 41 "/usr/include/wchar.h" 3 4 +# 42 "/usr/include/wchar.h" 3 4 + +/* Get size_t, wchar_t, wint_t and NULL from . */ +# define __need_size_t +# define __need_wchar_t +# define __need_NULL +#endif +# 48 "/usr/include/wchar.h" 3 4 +#if defined _WCHAR_H || defined __need_wint_t || !defined __WINT_TYPE__ +# undef __need_wint_t +# define __need_wint_t +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 51 "/usr/include/wchar.h" 3 4 +# 52 "/usr/include/wchar.h" 3 4 + +/* We try to get wint_t from , but not all GCC versions define it + there. So define it ourselves if it remains undefined. */ +# ifndef _WINT_T +/* Integral type unchanged by default argument promotions that can + hold any value corresponding to members of the extended character + set, as well as at least one value that does not correspond to any + member of the extended character set. */ +# define _WINT_T +typedef unsigned int wint_t; +# else +# 63 "/usr/include/wchar.h" 3 4 +/* Work around problems with the file which doesn't put + wint_t in the std namespace. */ +# if defined __cplusplus && defined _GLIBCPP_USE_NAMESPACES \ + && defined __WINT_TYPE__ +__BEGIN_NAMESPACE_STD +typedef __WINT_TYPE__ wint_t; +__END_NAMESPACE_STD +# endif +# 71 "/usr/include/wchar.h" 3 4 +# endif +# 72 "/usr/include/wchar.h" 3 4 + +/* Tell the caller that we provide correct C++ prototypes. */ +# if defined __cplusplus && __GNUC_PREREQ (4, 4) +# define __CORRECT_ISO_CPP_WCHAR_H_PROTO +# endif +# 77 "/usr/include/wchar.h" 3 4 +#endif +# 78 "/usr/include/wchar.h" 3 4 + +#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined ____mbstate_t_defined +# define ____mbstate_t_defined 1 +/* Conversion state information. */ +typedef struct +{ + int __count; + union + { +# ifdef __WINT_TYPE__ + __WINT_TYPE__ __wch; +# else +# 90 "/usr/include/wchar.h" 3 4 + wint_t __wch; +# endif +# 92 "/usr/include/wchar.h" 3 4 + char __wchb[4]; + } __value; /* Value so far. */ +} __mbstate_t; +#endif +# 96 "/usr/include/wchar.h" 3 4 +#undef __need_mbstate_t + + +/* The rest of the file is only used if used if __need_mbstate_t is not + defined. */ +#ifdef _WCHAR_H + +# ifndef __mbstate_t_defined +__BEGIN_NAMESPACE_C99 +/* Public type. */ +typedef __mbstate_t mbstate_t; +__END_NAMESPACE_C99 +# define __mbstate_t_defined 1 +# endif +# 110 "/usr/include/wchar.h" 3 4 + +#ifdef __USE_GNU +__USING_NAMESPACE_C99(mbstate_t) +#endif +# 114 "/usr/include/wchar.h" 3 4 + +#ifndef WCHAR_MIN +/* These constants might also be defined in . */ +# define WCHAR_MIN __WCHAR_MIN +# define WCHAR_MAX __WCHAR_MAX +#endif +# 120 "/usr/include/wchar.h" 3 4 + +#ifndef WEOF +# define WEOF (0xffffffffu) +#endif +# 124 "/usr/include/wchar.h" 3 4 + +/* For XPG4 compliance we have to define the stuff from here + as well. */ +#if defined __USE_XOPEN && !defined __USE_UNIX98 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 128 "/usr/include/wchar.h" 3 4 +# 129 "/usr/include/wchar.h" 3 4 +#endif +# 130 "/usr/include/wchar.h" 3 4 + + +__BEGIN_DECLS + +__BEGIN_NAMESPACE_STD +/* This incomplete type is defined in but needed here because + of `wcsftime'. */ +struct tm; +__END_NAMESPACE_STD +/* XXX We have to clean this up at some point. Since tm is in the std + namespace but wcsftime is in __c99 the type wouldn't be found + without inserting it in the global namespace. */ +__USING_NAMESPACE_STD(tm) + + +__BEGIN_NAMESPACE_STD +/* Copy SRC to DEST. */ +extern wchar_t *wcscpy (wchar_t *__restrict __dest, + const wchar_t *__restrict __src) __THROW; +/* Copy no more than N wide-characters of SRC to DEST. */ +extern wchar_t *wcsncpy (wchar_t *__restrict __dest, + const wchar_t *__restrict __src, size_t __n) + __THROW; + +/* Append SRC onto DEST. */ +extern wchar_t *wcscat (wchar_t *__restrict __dest, + const wchar_t *__restrict __src) __THROW; +/* Append no more than N wide-characters of SRC onto DEST. */ +extern wchar_t *wcsncat (wchar_t *__restrict __dest, + const wchar_t *__restrict __src, size_t __n) + __THROW; + +/* Compare S1 and S2. */ +extern int wcscmp (const wchar_t *__s1, const wchar_t *__s2) + __THROW __attribute_pure__; +/* Compare N wide-characters of S1 and S2. */ +extern int wcsncmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) + __THROW __attribute_pure__; +__END_NAMESPACE_STD + +#ifdef __USE_XOPEN2K8 +/* Compare S1 and S2, ignoring case. */ +extern int wcscasecmp (const wchar_t *__s1, const wchar_t *__s2) __THROW; + +/* Compare no more than N chars of S1 and S2, ignoring case. */ +extern int wcsncasecmp (const wchar_t *__s1, const wchar_t *__s2, + size_t __n) __THROW; + +/* Similar to the two functions above but take the information from + the provided locale and not the global locale. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 180 "/usr/include/wchar.h" 3 4 +# 181 "/usr/include/wchar.h" 3 4 + +extern int wcscasecmp_l (const wchar_t *__s1, const wchar_t *__s2, + __locale_t __loc) __THROW; + +extern int wcsncasecmp_l (const wchar_t *__s1, const wchar_t *__s2, + size_t __n, __locale_t __loc) __THROW; +#endif +# 188 "/usr/include/wchar.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Compare S1 and S2, both interpreted as appropriate to the + LC_COLLATE category of the current locale. */ +extern int wcscoll (const wchar_t *__s1, const wchar_t *__s2) __THROW; +/* Transform S2 into array pointed to by S1 such that if wcscmp is + applied to two transformed strings the result is the as applying + `wcscoll' to the original strings. */ +extern size_t wcsxfrm (wchar_t *__restrict __s1, + const wchar_t *__restrict __s2, size_t __n) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_XOPEN2K8 +/* Similar to the two functions above but take the information from + the provided locale and not the global locale. */ + +/* Compare S1 and S2, both interpreted as appropriate to the + LC_COLLATE category of the given locale. */ +extern int wcscoll_l (const wchar_t *__s1, const wchar_t *__s2, + __locale_t __loc) __THROW; + +/* Transform S2 into array pointed to by S1 such that if wcscmp is + applied to two transformed strings the result is the as applying + `wcscoll' to the original strings. */ +extern size_t wcsxfrm_l (wchar_t *__s1, const wchar_t *__s2, + size_t __n, __locale_t __loc) __THROW; + +/* Duplicate S, returning an identical malloc'd string. */ +extern wchar_t *wcsdup (const wchar_t *__s) __THROW __attribute_malloc__; +#endif +# 218 "/usr/include/wchar.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Find the first occurrence of WC in WCS. */ +#ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wcschr (wchar_t *__wcs, wchar_t __wc) + __THROW __asm ("wcschr") __attribute_pure__; +extern "C++" const wchar_t *wcschr (const wchar_t *__wcs, wchar_t __wc) + __THROW __asm ("wcschr") __attribute_pure__; +#else +# 227 "/usr/include/wchar.h" 3 4 +extern wchar_t *wcschr (const wchar_t *__wcs, wchar_t __wc) + __THROW __attribute_pure__; +#endif +# 230 "/usr/include/wchar.h" 3 4 +/* Find the last occurrence of WC in WCS. */ +#ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wcsrchr (wchar_t *__wcs, wchar_t __wc) + __THROW __asm ("wcsrchr") __attribute_pure__; +extern "C++" const wchar_t *wcsrchr (const wchar_t *__wcs, wchar_t __wc) + __THROW __asm ("wcsrchr") __attribute_pure__; +#else +# 237 "/usr/include/wchar.h" 3 4 +extern wchar_t *wcsrchr (const wchar_t *__wcs, wchar_t __wc) + __THROW __attribute_pure__; +#endif +# 240 "/usr/include/wchar.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* This function is similar to `wcschr'. But it returns a pointer to + the closing NUL wide character in case C is not found in S. */ +extern wchar_t *wcschrnul (const wchar_t *__s, wchar_t __wc) + __THROW __attribute_pure__; +#endif +# 248 "/usr/include/wchar.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Return the length of the initial segmet of WCS which + consists entirely of wide characters not in REJECT. */ +extern size_t wcscspn (const wchar_t *__wcs, const wchar_t *__reject) + __THROW __attribute_pure__; +/* Return the length of the initial segmet of WCS which + consists entirely of wide characters in ACCEPT. */ +extern size_t wcsspn (const wchar_t *__wcs, const wchar_t *__accept) + __THROW __attribute_pure__; +/* Find the first occurrence in WCS of any character in ACCEPT. */ +#ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wcspbrk (wchar_t *__wcs, const wchar_t *__accept) + __THROW __asm ("wcspbrk") __attribute_pure__; +extern "C++" const wchar_t *wcspbrk (const wchar_t *__wcs, + const wchar_t *__accept) + __THROW __asm ("wcspbrk") __attribute_pure__; +#else +# 266 "/usr/include/wchar.h" 3 4 +extern wchar_t *wcspbrk (const wchar_t *__wcs, const wchar_t *__accept) + __THROW __attribute_pure__; +#endif +# 269 "/usr/include/wchar.h" 3 4 +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +#ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wcsstr (wchar_t *__haystack, const wchar_t *__needle) + __THROW __asm ("wcsstr") __attribute_pure__; +extern "C++" const wchar_t *wcsstr (const wchar_t *__haystack, + const wchar_t *__needle) + __THROW __asm ("wcsstr") __attribute_pure__; +#else +# 277 "/usr/include/wchar.h" 3 4 +extern wchar_t *wcsstr (const wchar_t *__haystack, const wchar_t *__needle) + __THROW __attribute_pure__; +#endif +# 280 "/usr/include/wchar.h" 3 4 + +/* Divide WCS into tokens separated by characters in DELIM. */ +extern wchar_t *wcstok (wchar_t *__restrict __s, + const wchar_t *__restrict __delim, + wchar_t **__restrict __ptr) __THROW; + +/* Return the number of wide characters in S. */ +extern size_t wcslen (const wchar_t *__s) __THROW __attribute_pure__; +__END_NAMESPACE_STD + +#ifdef __USE_XOPEN +/* Another name for `wcsstr' from XPG4. */ +# ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wcswcs (wchar_t *__haystack, const wchar_t *__needle) + __THROW __asm ("wcswcs") __attribute_pure__; +extern "C++" const wchar_t *wcswcs (const wchar_t *__haystack, + const wchar_t *__needle) + __THROW __asm ("wcswcs") __attribute_pure__; +# else +# 299 "/usr/include/wchar.h" 3 4 +extern wchar_t *wcswcs (const wchar_t *__haystack, const wchar_t *__needle) + __THROW __attribute_pure__; +# endif +# 302 "/usr/include/wchar.h" 3 4 +#endif +# 303 "/usr/include/wchar.h" 3 4 + +#ifdef __USE_XOPEN2K8 +/* Return the number of wide characters in S, but at most MAXLEN. */ +extern size_t wcsnlen (const wchar_t *__s, size_t __maxlen) + __THROW __attribute_pure__; +#endif +# 309 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Search N wide characters of S for C. */ +#ifdef __CORRECT_ISO_CPP_WCHAR_H_PROTO +extern "C++" wchar_t *wmemchr (wchar_t *__s, wchar_t __c, size_t __n) + __THROW __asm ("wmemchr") __attribute_pure__; +extern "C++" const wchar_t *wmemchr (const wchar_t *__s, wchar_t __c, + size_t __n) + __THROW __asm ("wmemchr") __attribute_pure__; +#else +# 320 "/usr/include/wchar.h" 3 4 +extern wchar_t *wmemchr (const wchar_t *__s, wchar_t __c, size_t __n) + __THROW __attribute_pure__; +#endif +# 323 "/usr/include/wchar.h" 3 4 + +/* Compare N wide characters of S1 and S2. */ +extern int wmemcmp (const wchar_t *__s1, const wchar_t *__s2, size_t __n) + __THROW __attribute_pure__; + +/* Copy N wide characters of SRC to DEST. */ +extern wchar_t *wmemcpy (wchar_t *__restrict __s1, + const wchar_t *__restrict __s2, size_t __n) __THROW; + +/* Copy N wide characters of SRC to DEST, guaranteeing + correct behavior for overlapping strings. */ +extern wchar_t *wmemmove (wchar_t *__s1, const wchar_t *__s2, size_t __n) + __THROW; + +/* Set N wide characters of S to C. */ +extern wchar_t *wmemset (wchar_t *__s, wchar_t __c, size_t __n) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* Copy N wide characters of SRC to DEST and return pointer to following + wide character. */ +extern wchar_t *wmempcpy (wchar_t *__restrict __s1, + const wchar_t *__restrict __s2, size_t __n) + __THROW; +#endif +# 348 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Determine whether C constitutes a valid (one-byte) multibyte + character. */ +extern wint_t btowc (int __c) __THROW; + +/* Determine whether C corresponds to a member of the extended + character set whose multibyte representation is a single byte. */ +extern int wctob (wint_t __c) __THROW; + +/* Determine whether PS points to an object representing the initial + state. */ +extern int mbsinit (const mbstate_t *__ps) __THROW __attribute_pure__; + +/* Write wide character representation of multibyte character pointed + to by S to PWC. */ +extern size_t mbrtowc (wchar_t *__restrict __pwc, + const char *__restrict __s, size_t __n, + mbstate_t *__restrict __p) __THROW; + +/* Write multibyte representation of wide character WC to S. */ +extern size_t wcrtomb (char *__restrict __s, wchar_t __wc, + mbstate_t *__restrict __ps) __THROW; + +/* Return number of bytes in multibyte character pointed to by S. */ +extern size_t __mbrlen (const char *__restrict __s, size_t __n, + mbstate_t *__restrict __ps) __THROW; +extern size_t mbrlen (const char *__restrict __s, size_t __n, + mbstate_t *__restrict __ps) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_EXTERN_INLINES +/* Define inline function as optimization. */ + +/* We can use the BTOWC and WCTOB optimizations since we know that all + locales must use ASCII encoding for the values in the ASCII range + and because the wchar_t encoding is always ISO 10646. */ +extern wint_t __btowc_alias (int __c) __asm ("btowc"); +__extern_inline wint_t +__NTH (btowc (int __c)) +{ return (__builtin_constant_p (__c) && __c >= '\0' && __c <= '\x7f' + ? (wint_t) __c : __btowc_alias (__c)); } + +extern int __wctob_alias (wint_t __c) __asm ("wctob"); +__extern_inline int +__NTH (wctob (wint_t __wc)) +{ return (__builtin_constant_p (__wc) && __wc >= L'\0' && __wc <= L'\x7f' + ? (int) __wc : __wctob_alias (__wc)); } + +__extern_inline size_t +__NTH (mbrlen (const char *__restrict __s, size_t __n, + mbstate_t *__restrict __ps)) +{ return (__ps != NULL + ? mbrtowc (NULL, __s, __n, __ps) : __mbrlen (__s, __n, NULL)); } +#endif +# 404 "/usr/include/wchar.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Write wide character representation of multibyte character string + SRC to DST. */ +extern size_t mbsrtowcs (wchar_t *__restrict __dst, + const char **__restrict __src, size_t __len, + mbstate_t *__restrict __ps) __THROW; + +/* Write multibyte character representation of wide character string + SRC to DST. */ +extern size_t wcsrtombs (char *__restrict __dst, + const wchar_t **__restrict __src, size_t __len, + mbstate_t *__restrict __ps) __THROW; +__END_NAMESPACE_STD + + +#ifdef __USE_XOPEN2K8 +/* Write wide character representation of at most NMC bytes of the + multibyte character string SRC to DST. */ +extern size_t mbsnrtowcs (wchar_t *__restrict __dst, + const char **__restrict __src, size_t __nmc, + size_t __len, mbstate_t *__restrict __ps) __THROW; + +/* Write multibyte character representation of at most NWC characters + from the wide character string SRC to DST. */ +extern size_t wcsnrtombs (char *__restrict __dst, + const wchar_t **__restrict __src, + size_t __nwc, size_t __len, + mbstate_t *__restrict __ps) __THROW; +#endif /* use POSIX 2008 */ +# 434 "/usr/include/wchar.h" 3 4 + + +/* The following functions are extensions found in X/Open CAE. */ +#ifdef __USE_XOPEN +/* Determine number of column positions required for C. */ +extern int wcwidth (wchar_t __c) __THROW; + +/* Determine number of column positions required for first N wide + characters (or fewer if S ends before this) in S. */ +extern int wcswidth (const wchar_t *__s, size_t __n) __THROW; +#endif /* Use X/Open. */ +# 445 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Convert initial portion of the wide string NPTR to `double' + representation. */ +extern double wcstod (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Likewise for `float' and `long double' sizes of floating-point numbers. */ +extern float wcstof (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr) __THROW; +extern long double wcstold (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr) __THROW; +__END_NAMESPACE_C99 +#endif /* C99 */ +# 463 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Convert initial portion of wide string NPTR to `long int' + representation. */ +extern long int wcstol (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, int __base) __THROW; + +/* Convert initial portion of wide string NPTR to `unsigned long int' + representation. */ +extern unsigned long int wcstoul (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, int __base) + __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Convert initial portion of wide string NPTR to `long long int' + representation. */ +__extension__ +extern long long int wcstoll (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, int __base) + __THROW; + +/* Convert initial portion of wide string NPTR to `unsigned long long int' + representation. */ +__extension__ +extern unsigned long long int wcstoull (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + int __base) __THROW; +__END_NAMESPACE_C99 +#endif /* ISO C99. */ +# 495 "/usr/include/wchar.h" 3 4 + +#ifdef __USE_GNU +/* Convert initial portion of wide string NPTR to `long long int' + representation. */ +__extension__ +extern long long int wcstoq (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, int __base) + __THROW; + +/* Convert initial portion of wide string NPTR to `unsigned long long int' + representation. */ +__extension__ +extern unsigned long long int wcstouq (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + int __base) __THROW; +#endif /* Use GNU. */ +# 511 "/usr/include/wchar.h" 3 4 + +#ifdef __USE_GNU +/* The concept of one static locale per category is not very well + thought out. Many applications will need to process its data using + information from several different locales. Another application is + the implementation of the internationalization handling in the + upcoming ISO C++ standard library. To support this another set of + the functions using locale data exist which have an additional + argument. + + Attention: all these functions are *not* standardized in any form. + This is a proof-of-concept implementation. */ + +/* Structure for reentrant locale using functions. This is an + (almost) opaque type for the user level programs. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 526 "/usr/include/wchar.h" 3 4 +# 527 "/usr/include/wchar.h" 3 4 + +/* Special versions of the functions above which take the locale to + use as an additional parameter. */ +extern long int wcstol_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, int __base, + __locale_t __loc) __THROW; + +extern unsigned long int wcstoul_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + int __base, __locale_t __loc) __THROW; + +__extension__ +extern long long int wcstoll_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + int __base, __locale_t __loc) __THROW; + +__extension__ +extern unsigned long long int wcstoull_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + int __base, __locale_t __loc) + __THROW; + +extern double wcstod_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, __locale_t __loc) + __THROW; + +extern float wcstof_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, __locale_t __loc) + __THROW; + +extern long double wcstold_l (const wchar_t *__restrict __nptr, + wchar_t **__restrict __endptr, + __locale_t __loc) __THROW; +#endif /* use GNU */ +# 561 "/usr/include/wchar.h" 3 4 + + +#ifdef __USE_XOPEN2K8 +/* Copy SRC to DEST, returning the address of the terminating L'\0' in + DEST. */ +extern wchar_t *wcpcpy (wchar_t *__restrict __dest, + const wchar_t *__restrict __src) __THROW; + +/* Copy no more than N characters of SRC to DEST, returning the address of + the last character written into DEST. */ +extern wchar_t *wcpncpy (wchar_t *__restrict __dest, + const wchar_t *__restrict __src, size_t __n) + __THROW; + + +/* Wide character I/O functions. */ + +/* Like OPEN_MEMSTREAM, but the stream is wide oriented and produces + a wide character string. */ +extern __FILE *open_wmemstream (wchar_t **__bufloc, size_t *__sizeloc) __THROW; +#endif +# 582 "/usr/include/wchar.h" 3 4 + +#if defined __USE_ISOC95 || defined __USE_UNIX98 +__BEGIN_NAMESPACE_STD + +/* Select orientation for stream. */ +extern int fwide (__FILE *__fp, int __mode) __THROW; + + +/* Write formatted output to STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fwprintf (__FILE *__restrict __stream, + const wchar_t *__restrict __format, ...) + /* __attribute__ ((__format__ (__wprintf__, 2, 3))) */; +/* Write formatted output to stdout. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int wprintf (const wchar_t *__restrict __format, ...) + /* __attribute__ ((__format__ (__wprintf__, 1, 2))) */; +/* Write formatted output of at most N characters to S. */ +extern int swprintf (wchar_t *__restrict __s, size_t __n, + const wchar_t *__restrict __format, ...) + __THROW /* __attribute__ ((__format__ (__wprintf__, 3, 4))) */; + +/* Write formatted output to S from argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vfwprintf (__FILE *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg) + /* __attribute__ ((__format__ (__wprintf__, 2, 0))) */; +/* Write formatted output to stdout from argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vwprintf (const wchar_t *__restrict __format, + __gnuc_va_list __arg) + /* __attribute__ ((__format__ (__wprintf__, 1, 0))) */; +/* Write formatted output of at most N character to S from argument + list ARG. */ +extern int vswprintf (wchar_t *__restrict __s, size_t __n, + const wchar_t *__restrict __format, + __gnuc_va_list __arg) + __THROW /* __attribute__ ((__format__ (__wprintf__, 3, 0))) */; + + +/* Read formatted input from STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fwscanf (__FILE *__restrict __stream, + const wchar_t *__restrict __format, ...) + /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; +/* Read formatted input from stdin. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int wscanf (const wchar_t *__restrict __format, ...) + /* __attribute__ ((__format__ (__wscanf__, 1, 2))) */; +/* Read formatted input from S. */ +extern int swscanf (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, ...) + __THROW /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; + +# if defined __USE_ISOC99 && !defined __USE_GNU \ + && (!defined __LDBL_COMPAT || !defined __REDIRECT) \ + && (defined __STRICT_ANSI__ || defined __USE_XOPEN2K) +# ifdef __REDIRECT +/* For strict ISO C99 or POSIX compliance disallow %as, %aS and %a[ + GNU extension which conflicts with valid %a followed by letter + s, S or [. */ +extern int __REDIRECT (fwscanf, (__FILE *__restrict __stream, + const wchar_t *__restrict __format, ...), + __isoc99_fwscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; +extern int __REDIRECT (wscanf, (const wchar_t *__restrict __format, ...), + __isoc99_wscanf) + /* __attribute__ ((__format__ (__wscanf__, 1, 2))) */; +extern int __REDIRECT_NTH (swscanf, (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, + ...), __isoc99_swscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 3))) */; +# else +# 668 "/usr/include/wchar.h" 3 4 +extern int __isoc99_fwscanf (__FILE *__restrict __stream, + const wchar_t *__restrict __format, ...); +extern int __isoc99_wscanf (const wchar_t *__restrict __format, ...); +extern int __isoc99_swscanf (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, ...) + __THROW; +# define fwscanf __isoc99_fwscanf +# define wscanf __isoc99_wscanf +# define swscanf __isoc99_swscanf +# endif +# 678 "/usr/include/wchar.h" 3 4 +# endif +# 679 "/usr/include/wchar.h" 3 4 + +__END_NAMESPACE_STD +#endif /* Use ISO C95, C99 and Unix98. */ +# 682 "/usr/include/wchar.h" 3 4 + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Read formatted input from S into argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vfwscanf (__FILE *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg) + /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; +/* Read formatted input from stdin into argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vwscanf (const wchar_t *__restrict __format, + __gnuc_va_list __arg) + /* __attribute__ ((__format__ (__wscanf__, 1, 0))) */; +/* Read formatted input from S into argument list ARG. */ +extern int vswscanf (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg) + __THROW /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; + +# if !defined __USE_GNU \ + && (!defined __LDBL_COMPAT || !defined __REDIRECT) \ + && (defined __STRICT_ANSI__ || defined __USE_XOPEN2K) +# ifdef __REDIRECT +extern int __REDIRECT (vfwscanf, (__FILE *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg), __isoc99_vfwscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; +extern int __REDIRECT (vwscanf, (const wchar_t *__restrict __format, + __gnuc_va_list __arg), __isoc99_vwscanf) + /* __attribute__ ((__format__ (__wscanf__, 1, 0))) */; +extern int __REDIRECT_NTH (vswscanf, (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg), __isoc99_vswscanf) + /* __attribute__ ((__format__ (__wscanf__, 2, 0))) */; +# else +# 722 "/usr/include/wchar.h" 3 4 +extern int __isoc99_vfwscanf (__FILE *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg); +extern int __isoc99_vwscanf (const wchar_t *__restrict __format, + __gnuc_va_list __arg); +extern int __isoc99_vswscanf (const wchar_t *__restrict __s, + const wchar_t *__restrict __format, + __gnuc_va_list __arg) __THROW; +# define vfwscanf __isoc99_vfwscanf +# define vwscanf __isoc99_vwscanf +# define vswscanf __isoc99_vswscanf +# endif +# 734 "/usr/include/wchar.h" 3 4 +# endif +# 735 "/usr/include/wchar.h" 3 4 + +__END_NAMESPACE_C99 +#endif /* Use ISO C99. */ +# 738 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Read a character from STREAM. + + These functions are possible cancellation points and therefore not + marked with __THROW. */ +extern wint_t fgetwc (__FILE *__stream); +extern wint_t getwc (__FILE *__stream); + +/* Read a character from stdin. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern wint_t getwchar (void); + + +/* Write a character to STREAM. + + These functions are possible cancellation points and therefore not + marked with __THROW. */ +extern wint_t fputwc (wchar_t __wc, __FILE *__stream); +extern wint_t putwc (wchar_t __wc, __FILE *__stream); + +/* Write a character to stdout. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern wint_t putwchar (wchar_t __wc); + + +/* Get a newline-terminated wide character string of finite length + from STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern wchar_t *fgetws (wchar_t *__restrict __ws, int __n, + __FILE *__restrict __stream); + +/* Write a string to STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fputws (const wchar_t *__restrict __ws, + __FILE *__restrict __stream); + + +/* Push a character back onto the input buffer of STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern wint_t ungetwc (wint_t __wc, __FILE *__stream); +__END_NAMESPACE_STD + + +#ifdef __USE_GNU +/* These are defined to be equivalent to the `char' functions defined + in POSIX.1:1996. + + These functions are not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation they are cancellation points and + therefore not marked with __THROW. */ +extern wint_t getwc_unlocked (__FILE *__stream); +extern wint_t getwchar_unlocked (void); + +/* This is the wide character version of a GNU extension. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern wint_t fgetwc_unlocked (__FILE *__stream); + +/* Faster version when locking is not necessary. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern wint_t fputwc_unlocked (wchar_t __wc, __FILE *__stream); + +/* These are defined to be equivalent to the `char' functions defined + in POSIX.1:1996. + + These functions are not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation they are cancellation points and + therefore not marked with __THROW. */ +extern wint_t putwc_unlocked (wchar_t __wc, __FILE *__stream); +extern wint_t putwchar_unlocked (wchar_t __wc); + + +/* This function does the same as `fgetws' but does not lock the stream. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern wchar_t *fgetws_unlocked (wchar_t *__restrict __ws, int __n, + __FILE *__restrict __stream); + +/* This function does the same as `fputws' but does not lock the stream. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fputws_unlocked (const wchar_t *__restrict __ws, + __FILE *__restrict __stream); +#endif +# 849 "/usr/include/wchar.h" 3 4 + + +__BEGIN_NAMESPACE_C99 +/* Format TP into S according to FORMAT. + Write no more than MAXSIZE wide characters and return the number + of wide characters written, or 0 if it would exceed MAXSIZE. */ +extern size_t wcsftime (wchar_t *__restrict __s, size_t __maxsize, + const wchar_t *__restrict __format, + const struct tm *__restrict __tp) __THROW; +__END_NAMESPACE_C99 + +# ifdef __USE_GNU +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 861 "/usr/include/wchar.h" 3 4 +# 862 "/usr/include/wchar.h" 3 4 + +/* Similar to `wcsftime' but takes the information from + the provided locale and not the global locale. */ +extern size_t wcsftime_l (wchar_t *__restrict __s, size_t __maxsize, + const wchar_t *__restrict __format, + const struct tm *__restrict __tp, + __locale_t __loc) __THROW; +# endif +# 870 "/usr/include/wchar.h" 3 4 + +/* The X/Open standard demands that most of the functions defined in + the header must also appear here. This is probably + because some X/Open members wrote their implementation before the + ISO C standard was published and introduced the better solution. + We have to provide these definitions for compliance reasons but we + do this nonsense only if really necessary. */ +#if defined __USE_UNIX98 && !defined __USE_GNU +# define __need_iswxxx +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 879 "/usr/include/wchar.h" 3 4 +# 880 "/usr/include/wchar.h" 3 4 +#endif +# 881 "/usr/include/wchar.h" 3 4 + +/* Define some macros helping to catch buffer overflows. */ +#if __USE_FORTIFY_LEVEL > 0 && defined __fortify_function +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 884 "/usr/include/wchar.h" 3 4 +# 885 "/usr/include/wchar.h" 3 4 +#endif +# 886 "/usr/include/wchar.h" 3 4 + +#ifdef __LDBL_COMPAT +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 888 "/usr/include/wchar.h" 3 4 +# 889 "/usr/include/wchar.h" 3 4 +#endif +# 890 "/usr/include/wchar.h" 3 4 + +__END_DECLS + +#endif /* _WCHAR_H defined */ +# 894 "/usr/include/wchar.h" 3 4 + +#endif /* wchar.h */ +# 896 "/usr/include/wchar.h" 3 4 + +/* Undefine all __need_* constants in case we are included to get those + constants but the whole file was already read. */ +#undef __need_mbstate_t +#undef __need_wint_t +# 21 "/usr/include/_G_config.h" 2 3 4 +typedef struct +{ + __off_t __pos; + __mbstate_t __state; +} _G_fpos_t; +typedef struct +{ + __off64_t __pos; + __mbstate_t __state; +} _G_fpos64_t; +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 32 "/usr/include/_G_config.h" 3 4 +# 33 "/usr/include/_G_config.h" 3 4 +typedef union +{ + struct __gconv_info __cd; + struct + { + struct __gconv_info __cd; + struct __gconv_step_data __data; + } __combined; +} _G_iconv_t; +#endif +# 43 "/usr/include/_G_config.h" 3 4 + + +/* These library features are always available in the GNU C library. */ +#define _G_va_list __gnuc_va_list + +#define _G_HAVE_MMAP 1 +#define _G_HAVE_MREMAP 1 + +#define _G_IO_IO_FILE_VERSION 0x20001 + +/* This is defined by if `st_blksize' exists. */ +#define _G_HAVE_ST_BLKSIZE defined (_STATBUF_ST_BLKSIZE) + +#define _G_BUFSIZ 8192 + +#endif /* _G_config.h */ +# 59 "/usr/include/_G_config.h" 3 4 +# 32 "/usr/include/libio.h" 2 3 4 +/* ALL of these should be defined in _G_config.h */ +#define _IO_fpos_t _G_fpos_t +#define _IO_fpos64_t _G_fpos64_t +#define _IO_size_t size_t +#define _IO_ssize_t __ssize_t +#define _IO_off_t __off_t +#define _IO_off64_t __off64_t +#define _IO_pid_t __pid_t +#define _IO_uid_t __uid_t +#define _IO_iconv_t _G_iconv_t +#define _IO_HAVE_ST_BLKSIZE _G_HAVE_ST_BLKSIZE +#define _IO_BUFSIZ _G_BUFSIZ +#define _IO_va_list _G_va_list +#define _IO_wint_t wint_t + +/* This define avoids name pollution if we're using GNU stdarg.h */ +#define __need___va_list +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 49 "/usr/include/libio.h" 3 4 +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stdarg.h" 1 3 4 +/*===---- stdarg.h - Variable argument handling ----------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __STDARG_H +#define __STDARG_H + +#ifndef _VA_LIST +typedef __builtin_va_list va_list; +#define _VA_LIST +#endif +# 33 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stdarg.h" 3 4 +#define va_start(ap, param) __builtin_va_start(ap, param) +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) + +/* GCC always defines __va_copy, but does not define va_copy unless in c99 mode + * or -ansi is not specified, since it was not part of C90. + */ +#define __va_copy(d,s) __builtin_va_copy(d,s) + +#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L || !defined(__STRICT_ANSI__) +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#endif +# 45 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stdarg.h" 3 4 + +#ifndef __GNUC_VA_LIST +#define __GNUC_VA_LIST 1 +typedef __builtin_va_list __gnuc_va_list; +#endif +# 50 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stdarg.h" 3 4 + +#endif /* __STDARG_H */ +# 52 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stdarg.h" 3 4 +# 50 "/usr/include/libio.h" 2 3 4 +#ifdef __GNUC_VA_LIST +# undef _IO_va_list +# define _IO_va_list __gnuc_va_list +#endif /* __GNUC_VA_LIST */ +# 54 "/usr/include/libio.h" 3 4 + +#ifndef __P +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 56 "/usr/include/libio.h" 3 4 +# 57 "/usr/include/libio.h" 3 4 +#endif /*!__P*/ +# 58 "/usr/include/libio.h" 3 4 + +#define _IO_UNIFIED_JUMPTABLES 1 + +#ifndef EOF +# define EOF (-1) +#endif +# 64 "/usr/include/libio.h" 3 4 +#ifndef NULL +# if defined __GNUG__ && \ + (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 8)) +# define NULL (__null) +# else +# 69 "/usr/include/libio.h" 3 4 +# if !defined(__cplusplus) +# define NULL ((void*)0) +# else +# 72 "/usr/include/libio.h" 3 4 +# define NULL (0) +# endif +# 74 "/usr/include/libio.h" 3 4 +# endif +# 75 "/usr/include/libio.h" 3 4 +#endif +# 76 "/usr/include/libio.h" 3 4 + +#define _IOS_INPUT 1 +#define _IOS_OUTPUT 2 +#define _IOS_ATEND 4 +#define _IOS_APPEND 8 +#define _IOS_TRUNC 16 +#define _IOS_NOCREATE 32 +#define _IOS_NOREPLACE 64 +#define _IOS_BIN 128 + +/* Magic numbers and bits for the _flags field. + The magic numbers use the high-order bits of _flags; + the remaining bits are available for variable flags. + Note: The magic numbers must all be negative if stdio + emulation is desired. */ + +#define _IO_MAGIC 0xFBAD0000 /* Magic number */ +#define _OLD_STDIO_MAGIC 0xFABC0000 /* Emulate old stdio. */ +#define _IO_MAGIC_MASK 0xFFFF0000 +#define _IO_USER_BUF 1 /* User owns buffer; don't delete it on close. */ +#define _IO_UNBUFFERED 2 +#define _IO_NO_READS 4 /* Reading not allowed */ +#define _IO_NO_WRITES 8 /* Writing not allowd */ +#define _IO_EOF_SEEN 0x10 +#define _IO_ERR_SEEN 0x20 +#define _IO_DELETE_DONT_CLOSE 0x40 /* Don't call close(_fileno) on cleanup. */ +#define _IO_LINKED 0x80 /* Set if linked (using _chain) to streambuf::_list_all.*/ +#define _IO_IN_BACKUP 0x100 +#define _IO_LINE_BUF 0x200 +#define _IO_TIED_PUT_GET 0x400 /* Set if put and get pointer logicly tied. */ +#define _IO_CURRENTLY_PUTTING 0x800 +#define _IO_IS_APPENDING 0x1000 +#define _IO_IS_FILEBUF 0x2000 +#define _IO_BAD_SEEN 0x4000 +#define _IO_USER_LOCK 0x8000 + +#define _IO_FLAGS2_MMAP 1 +#define _IO_FLAGS2_NOTCANCEL 2 +#ifdef _LIBC +# define _IO_FLAGS2_FORTIFY 4 +#endif +# 117 "/usr/include/libio.h" 3 4 +#define _IO_FLAGS2_USER_WBUF 8 +#ifdef _LIBC +# define _IO_FLAGS2_SCANF_STD 16 +# define _IO_FLAGS2_NOCLOSE 32 +# define _IO_FLAGS2_CLOEXEC 64 +#endif +# 123 "/usr/include/libio.h" 3 4 + +/* These are "formatting flags" matching the iostream fmtflags enum values. */ +#define _IO_SKIPWS 01 +#define _IO_LEFT 02 +#define _IO_RIGHT 04 +#define _IO_INTERNAL 010 +#define _IO_DEC 020 +#define _IO_OCT 040 +#define _IO_HEX 0100 +#define _IO_SHOWBASE 0200 +#define _IO_SHOWPOINT 0400 +#define _IO_UPPERCASE 01000 +#define _IO_SHOWPOS 02000 +#define _IO_SCIENTIFIC 04000 +#define _IO_FIXED 010000 +#define _IO_UNITBUF 020000 +#define _IO_STDIO 040000 +#define _IO_DONT_CLOSE 0100000 +#define _IO_BOOLALPHA 0200000 + + +struct _IO_jump_t; struct _IO_FILE; + +/* Handle lock. */ +#ifdef _IO_MTSAFE_IO +# if defined __GLIBC__ && __GLIBC__ >= 2 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 149 "/usr/include/libio.h" 3 4 +# 150 "/usr/include/libio.h" 3 4 +# else +# 151 "/usr/include/libio.h" 3 4 +/*# include */ +# endif +# 153 "/usr/include/libio.h" 3 4 +#else +# 154 "/usr/include/libio.h" 3 4 +typedef void _IO_lock_t; +#endif +# 156 "/usr/include/libio.h" 3 4 + + +/* A streammarker remembers a position in a buffer. */ + +struct _IO_marker { + struct _IO_marker *_next; + struct _IO_FILE *_sbuf; + /* If _pos >= 0 + it points to _buf->Gbase()+_pos. FIXME comment */ + /* if _pos < 0, it points to _buf->eBptr()+_pos. FIXME comment */ + int _pos; +#if 0 + void set_streampos(streampos sp) { _spos = sp; } + void set_offset(int offset) { _pos = offset; _spos = (streampos)(-2); } + public: + streammarker(streambuf *sb); + ~streammarker(); + int saving() { return _spos == -2; } + int delta(streammarker&); + int delta(); +#endif +# 177 "/usr/include/libio.h" 3 4 +}; + +/* This is the structure from the libstdc++ codecvt class. */ +enum __codecvt_result +{ + __codecvt_ok, + __codecvt_partial, + __codecvt_error, + __codecvt_noconv +}; + +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +/* The order of the elements in the following struct must match the order + of the virtual functions in the libstdc++ codecvt class. */ +struct _IO_codecvt +{ + void (*__codecvt_destr) (struct _IO_codecvt *); + enum __codecvt_result (*__codecvt_do_out) (struct _IO_codecvt *, + __mbstate_t *, + const wchar_t *, + const wchar_t *, + const wchar_t **, char *, + char *, char **); + enum __codecvt_result (*__codecvt_do_unshift) (struct _IO_codecvt *, + __mbstate_t *, char *, + char *, char **); + enum __codecvt_result (*__codecvt_do_in) (struct _IO_codecvt *, + __mbstate_t *, + const char *, const char *, + const char **, wchar_t *, + wchar_t *, wchar_t **); + int (*__codecvt_do_encoding) (struct _IO_codecvt *); + int (*__codecvt_do_always_noconv) (struct _IO_codecvt *); + int (*__codecvt_do_length) (struct _IO_codecvt *, __mbstate_t *, + const char *, const char *, _IO_size_t); + int (*__codecvt_do_max_length) (struct _IO_codecvt *); + + _IO_iconv_t __cd_in; + _IO_iconv_t __cd_out; +}; + +/* Extra data for wide character streams. */ +struct _IO_wide_data +{ + wchar_t *_IO_read_ptr; /* Current read pointer */ + wchar_t *_IO_read_end; /* End of get area. */ + wchar_t *_IO_read_base; /* Start of putback+get area. */ + wchar_t *_IO_write_base; /* Start of put area. */ + wchar_t *_IO_write_ptr; /* Current put pointer. */ + wchar_t *_IO_write_end; /* End of put area. */ + wchar_t *_IO_buf_base; /* Start of reserve area. */ + wchar_t *_IO_buf_end; /* End of reserve area. */ + /* The following fields are used to support backing up and undo. */ + wchar_t *_IO_save_base; /* Pointer to start of non-current get area. */ + wchar_t *_IO_backup_base; /* Pointer to first valid character of + backup area */ + wchar_t *_IO_save_end; /* Pointer to end of non-current get area. */ + + __mbstate_t _IO_state; + __mbstate_t _IO_last_state; + struct _IO_codecvt _codecvt; + + wchar_t _shortbuf[1]; + + const struct _IO_jump_t *_wide_vtable; +}; +#endif +# 244 "/usr/include/libio.h" 3 4 + +struct _IO_FILE { + int _flags; /* High-order word is _IO_MAGIC; rest is flags. */ +#define _IO_file_flags _flags + + /* The following pointers correspond to the C++ streambuf protocol. */ + /* Note: Tk uses the _IO_read_ptr and _IO_read_end fields directly. */ + char* _IO_read_ptr; /* Current read pointer */ + char* _IO_read_end; /* End of get area. */ + char* _IO_read_base; /* Start of putback+get area. */ + char* _IO_write_base; /* Start of put area. */ + char* _IO_write_ptr; /* Current put pointer. */ + char* _IO_write_end; /* End of put area. */ + char* _IO_buf_base; /* Start of reserve area. */ + char* _IO_buf_end; /* End of reserve area. */ + /* The following fields are used to support backing up and undo. */ + char *_IO_save_base; /* Pointer to start of non-current get area. */ + char *_IO_backup_base; /* Pointer to first valid character of backup area */ + char *_IO_save_end; /* Pointer to end of non-current get area. */ + + struct _IO_marker *_markers; + + struct _IO_FILE *_chain; + + int _fileno; +#if 0 + int _blksize; +#else +# 272 "/usr/include/libio.h" 3 4 + int _flags2; +#endif +# 274 "/usr/include/libio.h" 3 4 + _IO_off_t _old_offset; /* This used to be _offset but it's too small. */ + +#define __HAVE_COLUMN /* temporary */ + /* 1+column number of pbase(); 0 is unknown. */ + unsigned short _cur_column; + signed char _vtable_offset; + char _shortbuf[1]; + + /* char* _save_gptr; char* _save_egptr; */ + + _IO_lock_t *_lock; +#ifdef _IO_USE_OLD_IO_FILE +}; + +struct _IO_FILE_complete +{ + struct _IO_FILE _file; +#endif +# 292 "/usr/include/libio.h" 3 4 +#if defined _G_IO_IO_FILE_VERSION && _G_IO_IO_FILE_VERSION == 0x20001 + _IO_off64_t _offset; +# if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T + /* Wide character stream stuff. */ + struct _IO_codecvt *_codecvt; + struct _IO_wide_data *_wide_data; + struct _IO_FILE *_freeres_list; + void *_freeres_buf; + size_t _freeres_size; +# else +# 302 "/usr/include/libio.h" 3 4 + void *__pad1; + void *__pad2; + void *__pad3; + void *__pad4; + size_t __pad5; +# endif +# 308 "/usr/include/libio.h" 3 4 + int _mode; + /* Make sure we don't get into trouble again. */ + char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)]; +#endif +# 312 "/usr/include/libio.h" 3 4 +}; + +#ifndef __cplusplus +typedef struct _IO_FILE _IO_FILE; +#endif +# 317 "/usr/include/libio.h" 3 4 + +struct _IO_FILE_plus; + +extern struct _IO_FILE_plus _IO_2_1_stdin_; +extern struct _IO_FILE_plus _IO_2_1_stdout_; +extern struct _IO_FILE_plus _IO_2_1_stderr_; +#ifndef _LIBC +#define _IO_stdin ((_IO_FILE*)(&_IO_2_1_stdin_)) +#define _IO_stdout ((_IO_FILE*)(&_IO_2_1_stdout_)) +#define _IO_stderr ((_IO_FILE*)(&_IO_2_1_stderr_)) +#else +# 328 "/usr/include/libio.h" 3 4 +extern _IO_FILE *_IO_stdin attribute_hidden; +extern _IO_FILE *_IO_stdout attribute_hidden; +extern _IO_FILE *_IO_stderr attribute_hidden; +#endif +# 332 "/usr/include/libio.h" 3 4 + + +/* Functions to do I/O and file management for a stream. */ + +/* Read NBYTES bytes from COOKIE into a buffer pointed to by BUF. + Return number of bytes read. */ +typedef __ssize_t __io_read_fn (void *__cookie, char *__buf, size_t __nbytes); + +/* Write N bytes pointed to by BUF to COOKIE. Write all N bytes + unless there is an error. Return number of bytes written. If + there is an error, return 0 and do not write anything. If the file + has been opened for append (__mode.__append set), then set the file + pointer to the end of the file and then do the write; if not, just + write at the current file pointer. */ +typedef __ssize_t __io_write_fn (void *__cookie, const char *__buf, + size_t __n); + +/* Move COOKIE's file position to *POS bytes from the + beginning of the file (if W is SEEK_SET), + the current position (if W is SEEK_CUR), + or the end of the file (if W is SEEK_END). + Set *POS to the new file position. + Returns zero if successful, nonzero if not. */ +typedef int __io_seek_fn (void *__cookie, _IO_off64_t *__pos, int __w); + +/* Close COOKIE. */ +typedef int __io_close_fn (void *__cookie); + + +#ifdef _GNU_SOURCE +/* User-visible names for the above. */ +typedef __io_read_fn cookie_read_function_t; +typedef __io_write_fn cookie_write_function_t; +typedef __io_seek_fn cookie_seek_function_t; +typedef __io_close_fn cookie_close_function_t; + +/* The structure with the cookie function pointers. */ +typedef struct +{ + __io_read_fn *read; /* Read bytes. */ + __io_write_fn *write; /* Write bytes. */ + __io_seek_fn *seek; /* Seek/tell file position. */ + __io_close_fn *close; /* Close file. */ +} _IO_cookie_io_functions_t; +typedef _IO_cookie_io_functions_t cookie_io_functions_t; + +struct _IO_cookie_file; + +/* Initialize one of those. */ +extern void _IO_cookie_init (struct _IO_cookie_file *__cfile, int __read_write, + void *__cookie, _IO_cookie_io_functions_t __fns); +#endif +# 384 "/usr/include/libio.h" 3 4 + + +#ifdef __cplusplus +extern "C" { +#endif +# 389 "/usr/include/libio.h" 3 4 + +extern int __underflow (_IO_FILE *); +extern int __uflow (_IO_FILE *); +extern int __overflow (_IO_FILE *, int); +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +extern _IO_wint_t __wunderflow (_IO_FILE *); +extern _IO_wint_t __wuflow (_IO_FILE *); +extern _IO_wint_t __woverflow (_IO_FILE *, _IO_wint_t); +#endif +# 398 "/usr/include/libio.h" 3 4 + +#if __GNUC__ >= 3 +# define _IO_BE(expr, res) __builtin_expect ((expr), res) +#else +# 402 "/usr/include/libio.h" 3 4 +# define _IO_BE(expr, res) (expr) +#endif +# 404 "/usr/include/libio.h" 3 4 + +#define _IO_getc_unlocked(_fp) \ + (_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) \ + ? __uflow (_fp) : *(unsigned char *) (_fp)->_IO_read_ptr++) +#define _IO_peekc_unlocked(_fp) \ + (_IO_BE ((_fp)->_IO_read_ptr >= (_fp)->_IO_read_end, 0) \ + && __underflow (_fp) == EOF ? EOF \ + : *(unsigned char *) (_fp)->_IO_read_ptr) +#define _IO_putc_unlocked(_ch, _fp) \ + (_IO_BE ((_fp)->_IO_write_ptr >= (_fp)->_IO_write_end, 0) \ + ? __overflow (_fp, (unsigned char) (_ch)) \ + : (unsigned char) (*(_fp)->_IO_write_ptr++ = (_ch))) + +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +# define _IO_getwc_unlocked(_fp) \ + (_IO_BE ((_fp)->_wide_data == NULL \ + || ((_fp)->_wide_data->_IO_read_ptr \ + >= (_fp)->_wide_data->_IO_read_end), 0) \ + ? __wuflow (_fp) : (_IO_wint_t) *(_fp)->_wide_data->_IO_read_ptr++) +# define _IO_putwc_unlocked(_wch, _fp) \ + (_IO_BE ((_fp)->_wide_data == NULL \ + || ((_fp)->_wide_data->_IO_write_ptr \ + >= (_fp)->_wide_data->_IO_write_end), 0) \ + ? __woverflow (_fp, _wch) \ + : (_IO_wint_t) (*(_fp)->_wide_data->_IO_write_ptr++ = (_wch))) +#endif +# 430 "/usr/include/libio.h" 3 4 + +#define _IO_feof_unlocked(__fp) (((__fp)->_flags & _IO_EOF_SEEN) != 0) +#define _IO_ferror_unlocked(__fp) (((__fp)->_flags & _IO_ERR_SEEN) != 0) + +extern int _IO_getc (_IO_FILE *__fp); +extern int _IO_putc (int __c, _IO_FILE *__fp); +extern int _IO_feof (_IO_FILE *__fp) __THROW; +extern int _IO_ferror (_IO_FILE *__fp) __THROW; + +extern int _IO_peekc_locked (_IO_FILE *__fp); + +/* This one is for Emacs. */ +#define _IO_PENDING_OUTPUT_COUNT(_fp) \ + ((_fp)->_IO_write_ptr - (_fp)->_IO_write_base) + +extern void _IO_flockfile (_IO_FILE *) __THROW; +extern void _IO_funlockfile (_IO_FILE *) __THROW; +extern int _IO_ftrylockfile (_IO_FILE *) __THROW; + +#ifdef _IO_MTSAFE_IO +# define _IO_peekc(_fp) _IO_peekc_locked (_fp) +# define _IO_flockfile(_fp) \ + if (((_fp)->_flags & _IO_USER_LOCK) == 0) _IO_flockfile (_fp) +# define _IO_funlockfile(_fp) \ + if (((_fp)->_flags & _IO_USER_LOCK) == 0) _IO_funlockfile (_fp) +#else +# 456 "/usr/include/libio.h" 3 4 +# define _IO_peekc(_fp) _IO_peekc_unlocked (_fp) +# define _IO_flockfile(_fp) /**/ +# define _IO_funlockfile(_fp) /**/ +# define _IO_ftrylockfile(_fp) /**/ +# define _IO_cleanup_region_start(_fct, _fp) /**/ +# define _IO_cleanup_region_end(_Doit) /**/ +#endif /* !_IO_MTSAFE_IO */ +# 463 "/usr/include/libio.h" 3 4 + +extern int _IO_vfscanf (_IO_FILE * __restrict, const char * __restrict, + _IO_va_list, int *__restrict); +extern int _IO_vfprintf (_IO_FILE *__restrict, const char *__restrict, + _IO_va_list); +extern _IO_ssize_t _IO_padn (_IO_FILE *, int, _IO_ssize_t); +extern _IO_size_t _IO_sgetn (_IO_FILE *, void *, _IO_size_t); + +extern _IO_off64_t _IO_seekoff (_IO_FILE *, _IO_off64_t, int, int); +extern _IO_off64_t _IO_seekpos (_IO_FILE *, _IO_off64_t, int); + +extern void _IO_free_backup_area (_IO_FILE *) __THROW; + +#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T +extern _IO_wint_t _IO_getwc (_IO_FILE *__fp); +extern _IO_wint_t _IO_putwc (wchar_t __wc, _IO_FILE *__fp); +extern int _IO_fwide (_IO_FILE *__fp, int __mode) __THROW; +# if __GNUC__ >= 2 +/* While compiling glibc we have to handle compatibility with very old + versions. */ +# if defined _LIBC && defined SHARED +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 484 "/usr/include/libio.h" 3 4 +# 485 "/usr/include/libio.h" 3 4 +# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) +# define _IO_fwide_maybe_incompatible \ + (__builtin_expect (&_IO_stdin_used == NULL, 0)) +extern const int _IO_stdin_used; +weak_extern (_IO_stdin_used); +# endif +# 491 "/usr/include/libio.h" 3 4 +# endif +# 492 "/usr/include/libio.h" 3 4 +# ifndef _IO_fwide_maybe_incompatible +# define _IO_fwide_maybe_incompatible (0) +# endif +# 495 "/usr/include/libio.h" 3 4 +/* A special optimized version of the function above. It optimizes the + case of initializing an unoriented byte stream. */ +# define _IO_fwide(__fp, __mode) \ + ({ int __result = (__mode); \ + if (__result < 0 && ! _IO_fwide_maybe_incompatible) \ + { \ + if ((__fp)->_mode == 0) \ + /* We know that all we have to do is to set the flag. */ \ + (__fp)->_mode = -1; \ + __result = (__fp)->_mode; \ + } \ + else if (__builtin_constant_p (__mode) && (__mode) == 0) \ + __result = _IO_fwide_maybe_incompatible ? -1 : (__fp)->_mode; \ + else \ + __result = _IO_fwide (__fp, __result); \ + __result; }) +# endif +# 512 "/usr/include/libio.h" 3 4 + +extern int _IO_vfwscanf (_IO_FILE * __restrict, const wchar_t * __restrict, + _IO_va_list, int *__restrict); +extern int _IO_vfwprintf (_IO_FILE *__restrict, const wchar_t *__restrict, + _IO_va_list); +extern _IO_ssize_t _IO_wpadn (_IO_FILE *, wint_t, _IO_ssize_t); +extern void _IO_free_wbackup_area (_IO_FILE *) __THROW; +#endif +# 520 "/usr/include/libio.h" 3 4 + +#ifdef __LDBL_COMPAT +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 522 "/usr/include/libio.h" 3 4 +# 523 "/usr/include/libio.h" 3 4 +#endif +# 524 "/usr/include/libio.h" 3 4 + +#ifdef __cplusplus +} +#endif +# 528 "/usr/include/libio.h" 3 4 + +#endif /* _IO_STDIO_H */ +# 530 "/usr/include/libio.h" 3 4 +# 75 "/usr/include/stdio.h" 2 3 4 + +#if defined __USE_XOPEN || defined __USE_XOPEN2K8 +# ifdef __GNUC__ +# ifndef _VA_LIST_DEFINED +typedef _G_va_list va_list; +# define _VA_LIST_DEFINED +# endif +# 82 "/usr/include/stdio.h" 3 4 +# else +# 83 "/usr/include/stdio.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 83 "/usr/include/stdio.h" 3 4 +# 84 "/usr/include/stdio.h" 3 4 +# endif +# 85 "/usr/include/stdio.h" 3 4 +#endif +# 86 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_XOPEN2K8 +# ifndef __off_t_defined +# ifndef __USE_FILE_OFFSET64 +typedef __off_t off_t; +# else +# 92 "/usr/include/stdio.h" 3 4 +typedef __off64_t off_t; +# endif +# 94 "/usr/include/stdio.h" 3 4 +# define __off_t_defined +# endif +# 96 "/usr/include/stdio.h" 3 4 +# if defined __USE_LARGEFILE64 && !defined __off64_t_defined +typedef __off64_t off64_t; +# define __off64_t_defined +# endif +# 100 "/usr/include/stdio.h" 3 4 + +# ifndef __ssize_t_defined +typedef __ssize_t ssize_t; +# define __ssize_t_defined +# endif +# 105 "/usr/include/stdio.h" 3 4 +#endif +# 106 "/usr/include/stdio.h" 3 4 + +/* The type of the second argument to `fgetpos' and `fsetpos'. */ +__BEGIN_NAMESPACE_STD +#ifndef __USE_FILE_OFFSET64 +typedef _G_fpos_t fpos_t; +#else +# 112 "/usr/include/stdio.h" 3 4 +typedef _G_fpos64_t fpos_t; +#endif +# 114 "/usr/include/stdio.h" 3 4 +__END_NAMESPACE_STD +#ifdef __USE_LARGEFILE64 +typedef _G_fpos64_t fpos64_t; +#endif +# 118 "/usr/include/stdio.h" 3 4 + +/* The possibilities for the third argument to `setvbuf'. */ +#define _IOFBF 0 /* Fully buffered. */ +#define _IOLBF 1 /* Line buffered. */ +#define _IONBF 2 /* No buffering. */ + + +/* Default buffer size. */ +#ifndef BUFSIZ +# define BUFSIZ _IO_BUFSIZ +#endif +# 129 "/usr/include/stdio.h" 3 4 + + +/* End of file character. + Some things throughout the library rely on this being -1. */ +#ifndef EOF +# define EOF (-1) +#endif +# 136 "/usr/include/stdio.h" 3 4 + + +/* The possibilities for the third argument to `fseek'. + These values should not be changed. */ +#define SEEK_SET 0 /* Seek from beginning of file. */ +#define SEEK_CUR 1 /* Seek from current position. */ +#define SEEK_END 2 /* Seek from end of file. */ +#ifdef __USE_GNU +# define SEEK_DATA 3 /* Seek to next data. */ +# define SEEK_HOLE 4 /* Seek to next hole. */ +#endif +# 147 "/usr/include/stdio.h" 3 4 + + +#if defined __USE_SVID || defined __USE_XOPEN +/* Default path prefix for `tempnam' and `tmpnam'. */ +# define P_tmpdir "/tmp" +#endif +# 153 "/usr/include/stdio.h" 3 4 + + +/* Get the values: + L_tmpnam How long an array of chars must be to be passed to `tmpnam'. + TMP_MAX The minimum number of unique filenames generated by tmpnam + (and tempnam when it uses tmpnam's name space), + or tempnam (the two are separate). + L_ctermid How long an array to pass to `ctermid'. + L_cuserid How long an array to pass to `cuserid'. + FOPEN_MAX Minimum number of files that can be open at once. + FILENAME_MAX Maximum length of a filename. */ +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 164 "/usr/include/stdio.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 1 3 4 +/* Copyright (C) 1994-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#if !defined _STDIO_H && !defined __need_FOPEN_MAX && !defined __need_IOV_MAX +# error "Never include directly; use instead." +#endif +# 21 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 + +#ifdef _STDIO_H +# define L_tmpnam 20 +# define TMP_MAX 238328 +# define FILENAME_MAX 4096 + +# ifdef __USE_POSIX +# define L_ctermid 9 +# if !defined __USE_XOPEN2K || defined __USE_GNU +# define L_cuserid 9 +# endif +# 32 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 +# endif +# 33 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 +#endif +# 34 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 + +#if defined __need_FOPEN_MAX || defined _STDIO_H +# undef FOPEN_MAX +# define FOPEN_MAX 16 +#endif +# 39 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 + +#if defined __need_IOV_MAX && !defined IOV_MAX +# define IOV_MAX 1024 +#endif +# 43 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 3 4 +# 165 "/usr/include/stdio.h" 2 3 4 + + +/* Standard streams. */ +extern struct _IO_FILE *stdin; /* Standard input stream. */ +extern struct _IO_FILE *stdout; /* Standard output stream. */ +extern struct _IO_FILE *stderr; /* Standard error output stream. */ +/* C89/C99 say they're macros. Make them happy. */ +#define stdin stdin +#define stdout stdout +#define stderr stderr + +__BEGIN_NAMESPACE_STD +/* Remove file FILENAME. */ +extern int remove (const char *__filename) __THROW; +/* Rename file OLD to NEW. */ +extern int rename (const char *__old, const char *__new) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_ATFILE +/* Rename file OLD relative to OLDFD to NEW relative to NEWFD. */ +extern int renameat (int __oldfd, const char *__old, int __newfd, + const char *__new) __THROW; +#endif +# 188 "/usr/include/stdio.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Create a temporary file and open it read/write. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +#ifndef __USE_FILE_OFFSET64 +extern FILE *tmpfile (void) __wur; +#else +# 197 "/usr/include/stdio.h" 3 4 +# ifdef __REDIRECT +extern FILE *__REDIRECT (tmpfile, (void), tmpfile64) __wur; +# else +# 200 "/usr/include/stdio.h" 3 4 +# define tmpfile tmpfile64 +# endif +# 202 "/usr/include/stdio.h" 3 4 +#endif +# 203 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_LARGEFILE64 +extern FILE *tmpfile64 (void) __wur; +#endif +# 207 "/usr/include/stdio.h" 3 4 + +/* Generate a temporary filename. */ +extern char *tmpnam (char *__s) __THROW __wur; +__END_NAMESPACE_STD + +#ifdef __USE_MISC +/* This is the reentrant variant of `tmpnam'. The only difference is + that it does not allow S to be NULL. */ +extern char *tmpnam_r (char *__s) __THROW __wur; +#endif +# 217 "/usr/include/stdio.h" 3 4 + + +#if defined __USE_SVID || defined __USE_XOPEN +/* Generate a unique temporary filename using up to five characters of PFX + if it is not NULL. The directory to put this file in is searched for + as follows: First the environment variable "TMPDIR" is checked. + If it contains the name of a writable directory, that directory is used. + If not and if DIR is not NULL, that value is checked. If that fails, + P_tmpdir is tried and finally "/tmp". The storage for the filename + is allocated by `malloc'. */ +extern char *tempnam (const char *__dir, const char *__pfx) + __THROW __attribute_malloc__ __wur; +#endif +# 230 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Close STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fclose (FILE *__stream); +/* Flush STREAM, or all streams if STREAM is NULL. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fflush (FILE *__stream); +__END_NAMESPACE_STD + +#ifdef __USE_MISC +/* Faster versions when locking is not required. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fflush_unlocked (FILE *__stream); +#endif +# 254 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_GNU +/* Close all streams. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fcloseall (void); +#endif +# 264 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +#ifndef __USE_FILE_OFFSET64 +/* Open a file and create a new stream for it. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern FILE *fopen (const char *__restrict __filename, + const char *__restrict __modes) __wur; +/* Open a file, replacing an existing stream with it. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern FILE *freopen (const char *__restrict __filename, + const char *__restrict __modes, + FILE *__restrict __stream) __wur; +#else +# 282 "/usr/include/stdio.h" 3 4 +# ifdef __REDIRECT +extern FILE *__REDIRECT (fopen, (const char *__restrict __filename, + const char *__restrict __modes), fopen64) + __wur; +extern FILE *__REDIRECT (freopen, (const char *__restrict __filename, + const char *__restrict __modes, + FILE *__restrict __stream), freopen64) + __wur; +# else +# 291 "/usr/include/stdio.h" 3 4 +# define fopen fopen64 +# define freopen freopen64 +# endif +# 294 "/usr/include/stdio.h" 3 4 +#endif +# 295 "/usr/include/stdio.h" 3 4 +__END_NAMESPACE_STD +#ifdef __USE_LARGEFILE64 +extern FILE *fopen64 (const char *__restrict __filename, + const char *__restrict __modes) __wur; +extern FILE *freopen64 (const char *__restrict __filename, + const char *__restrict __modes, + FILE *__restrict __stream) __wur; +#endif +# 303 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_POSIX +/* Create a new stream that refers to an existing system file descriptor. */ +extern FILE *fdopen (int __fd, const char *__modes) __THROW __wur; +#endif +# 308 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_GNU +/* Create a new stream that refers to the given magic cookie, + and uses the given functions for input and output. */ +extern FILE *fopencookie (void *__restrict __magic_cookie, + const char *__restrict __modes, + _IO_cookie_io_functions_t __io_funcs) __THROW __wur; +#endif +# 316 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_XOPEN2K8 +/* Create a new stream that refers to a memory buffer. */ +extern FILE *fmemopen (void *__s, size_t __len, const char *__modes) + __THROW __wur; + +/* Open a stream that writes into a malloc'd buffer that is expanded as + necessary. *BUFLOC and *SIZELOC are updated with the buffer's location + and the number of characters written on fflush or fclose. */ +extern FILE *open_memstream (char **__bufloc, size_t *__sizeloc) __THROW __wur; +#endif +# 327 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* If BUF is NULL, make STREAM unbuffered. + Else make it use buffer BUF, of size BUFSIZ. */ +extern void setbuf (FILE *__restrict __stream, char *__restrict __buf) __THROW; +/* Make STREAM use buffering mode MODE. + If BUF is not NULL, use N bytes of it for buffering; + else allocate an internal buffer N bytes long. */ +extern int setvbuf (FILE *__restrict __stream, char *__restrict __buf, + int __modes, size_t __n) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_BSD +/* If BUF is NULL, make STREAM unbuffered. + Else make it use SIZE bytes of BUF for buffering. */ +extern void setbuffer (FILE *__restrict __stream, char *__restrict __buf, + size_t __size) __THROW; + +/* Make STREAM line-buffered. */ +extern void setlinebuf (FILE *__stream) __THROW; +#endif +# 349 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Write formatted output to STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fprintf (FILE *__restrict __stream, + const char *__restrict __format, ...); +/* Write formatted output to stdout. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int printf (const char *__restrict __format, ...); +/* Write formatted output to S. */ +extern int sprintf (char *__restrict __s, + const char *__restrict __format, ...) __THROWNL; + +/* Write formatted output to S from argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vfprintf (FILE *__restrict __s, const char *__restrict __format, + _G_va_list __arg); +/* Write formatted output to stdout from argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vprintf (const char *__restrict __format, _G_va_list __arg); +/* Write formatted output to S from argument list ARG. */ +extern int vsprintf (char *__restrict __s, const char *__restrict __format, + _G_va_list __arg) __THROWNL; +__END_NAMESPACE_STD + +#if defined __USE_BSD || defined __USE_ISOC99 || defined __USE_UNIX98 +__BEGIN_NAMESPACE_C99 +/* Maximum chars of output to write in MAXLEN. */ +extern int snprintf (char *__restrict __s, size_t __maxlen, + const char *__restrict __format, ...) + __THROWNL __attribute__ ((__format__ (__printf__, 3, 4))); + +extern int vsnprintf (char *__restrict __s, size_t __maxlen, + const char *__restrict __format, _G_va_list __arg) + __THROWNL __attribute__ ((__format__ (__printf__, 3, 0))); +__END_NAMESPACE_C99 +#endif +# 395 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_GNU +/* Write formatted output to a string dynamically allocated with `malloc'. + Store the address of the string in *PTR. */ +extern int vasprintf (char **__restrict __ptr, const char *__restrict __f, + _G_va_list __arg) + __THROWNL __attribute__ ((__format__ (__printf__, 2, 0))) __wur; +extern int __asprintf (char **__restrict __ptr, + const char *__restrict __fmt, ...) + __THROWNL __attribute__ ((__format__ (__printf__, 2, 3))) __wur; +extern int asprintf (char **__restrict __ptr, + const char *__restrict __fmt, ...) + __THROWNL __attribute__ ((__format__ (__printf__, 2, 3))) __wur; +#endif +# 409 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_XOPEN2K8 +/* Write formatted output to a file descriptor. */ +extern int vdprintf (int __fd, const char *__restrict __fmt, + _G_va_list __arg) + __attribute__ ((__format__ (__printf__, 2, 0))); +extern int dprintf (int __fd, const char *__restrict __fmt, ...) + __attribute__ ((__format__ (__printf__, 2, 3))); +#endif +# 418 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Read formatted input from STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fscanf (FILE *__restrict __stream, + const char *__restrict __format, ...) __wur; +/* Read formatted input from stdin. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int scanf (const char *__restrict __format, ...) __wur; +/* Read formatted input from S. */ +extern int sscanf (const char *__restrict __s, + const char *__restrict __format, ...) __THROW; + +#if defined __USE_ISOC99 && !defined __USE_GNU \ + && (!defined __LDBL_COMPAT || !defined __REDIRECT) \ + && (defined __STRICT_ANSI__ || defined __USE_XOPEN2K) +# ifdef __REDIRECT +/* For strict ISO C99 or POSIX compliance disallow %as, %aS and %a[ + GNU extension which conflicts with valid %a followed by letter + s, S or [. */ +extern int __REDIRECT (fscanf, (FILE *__restrict __stream, + const char *__restrict __format, ...), + __isoc99_fscanf) __wur; +extern int __REDIRECT (scanf, (const char *__restrict __format, ...), + __isoc99_scanf) __wur; +extern int __REDIRECT_NTH (sscanf, (const char *__restrict __s, + const char *__restrict __format, ...), + __isoc99_sscanf); +# else +# 452 "/usr/include/stdio.h" 3 4 +extern int __isoc99_fscanf (FILE *__restrict __stream, + const char *__restrict __format, ...) __wur; +extern int __isoc99_scanf (const char *__restrict __format, ...) __wur; +extern int __isoc99_sscanf (const char *__restrict __s, + const char *__restrict __format, ...) __THROW; +# define fscanf __isoc99_fscanf +# define scanf __isoc99_scanf +# define sscanf __isoc99_sscanf +# endif +# 461 "/usr/include/stdio.h" 3 4 +#endif +# 462 "/usr/include/stdio.h" 3 4 + +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Read formatted input from S into argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vfscanf (FILE *__restrict __s, const char *__restrict __format, + _G_va_list __arg) + __attribute__ ((__format__ (__scanf__, 2, 0))) __wur; + +/* Read formatted input from stdin into argument list ARG. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int vscanf (const char *__restrict __format, _G_va_list __arg) + __attribute__ ((__format__ (__scanf__, 1, 0))) __wur; + +/* Read formatted input from S into argument list ARG. */ +extern int vsscanf (const char *__restrict __s, + const char *__restrict __format, _G_va_list __arg) + __THROW __attribute__ ((__format__ (__scanf__, 2, 0))); + +# if !defined __USE_GNU \ + && (!defined __LDBL_COMPAT || !defined __REDIRECT) \ + && (defined __STRICT_ANSI__ || defined __USE_XOPEN2K) +# ifdef __REDIRECT +/* For strict ISO C99 or POSIX compliance disallow %as, %aS and %a[ + GNU extension which conflicts with valid %a followed by letter + s, S or [. */ +extern int __REDIRECT (vfscanf, + (FILE *__restrict __s, + const char *__restrict __format, _G_va_list __arg), + __isoc99_vfscanf) + __attribute__ ((__format__ (__scanf__, 2, 0))) __wur; +extern int __REDIRECT (vscanf, (const char *__restrict __format, + _G_va_list __arg), __isoc99_vscanf) + __attribute__ ((__format__ (__scanf__, 1, 0))) __wur; +extern int __REDIRECT_NTH (vsscanf, + (const char *__restrict __s, + const char *__restrict __format, + _G_va_list __arg), __isoc99_vsscanf) + __attribute__ ((__format__ (__scanf__, 2, 0))); +# else +# 508 "/usr/include/stdio.h" 3 4 +extern int __isoc99_vfscanf (FILE *__restrict __s, + const char *__restrict __format, + _G_va_list __arg) __wur; +extern int __isoc99_vscanf (const char *__restrict __format, + _G_va_list __arg) __wur; +extern int __isoc99_vsscanf (const char *__restrict __s, + const char *__restrict __format, + _G_va_list __arg) __THROW; +# define vfscanf __isoc99_vfscanf +# define vscanf __isoc99_vscanf +# define vsscanf __isoc99_vsscanf +# endif +# 520 "/usr/include/stdio.h" 3 4 +# endif +# 521 "/usr/include/stdio.h" 3 4 + +__END_NAMESPACE_C99 +#endif /* Use ISO C9x. */ +# 524 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Read a character from STREAM. + + These functions are possible cancellation points and therefore not + marked with __THROW. */ +extern int fgetc (FILE *__stream); +extern int getc (FILE *__stream); + +/* Read a character from stdin. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int getchar (void); +__END_NAMESPACE_STD + +/* The C standard explicitly says this is a macro, so we always do the + optimization for it. */ +#define getc(_fp) _IO_getc (_fp) + +#if defined __USE_POSIX || defined __USE_MISC +/* These are defined in POSIX.1:1996. + + These functions are possible cancellation points and therefore not + marked with __THROW. */ +extern int getc_unlocked (FILE *__stream); +extern int getchar_unlocked (void); +#endif /* Use POSIX or MISC. */ +# 553 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_MISC +/* Faster version when locking is not necessary. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fgetc_unlocked (FILE *__stream); +#endif /* Use MISC. */ +# 563 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Write a character to STREAM. + + These functions are possible cancellation points and therefore not + marked with __THROW. + + These functions is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fputc (int __c, FILE *__stream); +extern int putc (int __c, FILE *__stream); + +/* Write a character to stdout. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int putchar (int __c); +__END_NAMESPACE_STD + +/* The C standard explicitly says this can be a macro, + so we always do the optimization for it. */ +#define putc(_ch, _fp) _IO_putc (_ch, _fp) + +#ifdef __USE_MISC +/* Faster version when locking is not necessary. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fputc_unlocked (int __c, FILE *__stream); +#endif /* Use MISC. */ +# 596 "/usr/include/stdio.h" 3 4 + +#if defined __USE_POSIX || defined __USE_MISC +/* These are defined in POSIX.1:1996. + + These functions are possible cancellation points and therefore not + marked with __THROW. */ +extern int putc_unlocked (int __c, FILE *__stream); +extern int putchar_unlocked (int __c); +#endif /* Use POSIX or MISC. */ +# 605 "/usr/include/stdio.h" 3 4 + + +#if defined __USE_SVID || defined __USE_MISC \ + || (defined __USE_XOPEN && !defined __USE_XOPEN2K) +/* Get a word (int) from STREAM. */ +extern int getw (FILE *__stream); + +/* Write a word (int) to STREAM. */ +extern int putw (int __w, FILE *__stream); +#endif +# 615 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Get a newline-terminated string of finite length from STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream) + __wur; + +#if !defined __USE_ISOC11 \ + || (defined __cplusplus && __cplusplus <= 201103L) +/* Get a newline-terminated string from stdin, removing the newline. + DO NOT USE THIS FUNCTION!! There is no limit on how much it will read. + + The function has been officially removed in ISO C11. This opportunity + is used to also remove it from the GNU feature list. It is now only + available when explicitly using an old ISO C, Unix, or POSIX standard. + GCC defines _GNU_SOURCE when building C++ code and the function is still + in C++11, so it is also available for C++. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern char *gets (char *__s) __wur __attribute_deprecated__; +#endif +# 640 "/usr/include/stdio.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* This function does the same as `fgets' but does not lock the stream. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern char *fgets_unlocked (char *__restrict __s, int __n, + FILE *__restrict __stream) __wur; +#endif +# 652 "/usr/include/stdio.h" 3 4 + + +#ifdef __USE_XOPEN2K8 +/* Read up to (and including) a DELIMITER from STREAM into *LINEPTR + (and null-terminate it). *LINEPTR is a pointer returned from malloc (or + NULL), pointing to *N characters of space. It is realloc'd as + necessary. Returns the number of characters read (not including the + null terminator), or -1 on error or EOF. + + These functions are not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation they are cancellation points and + therefore not marked with __THROW. */ +extern _IO_ssize_t __getdelim (char **__restrict __lineptr, + size_t *__restrict __n, int __delimiter, + FILE *__restrict __stream) __wur; +extern _IO_ssize_t getdelim (char **__restrict __lineptr, + size_t *__restrict __n, int __delimiter, + FILE *__restrict __stream) __wur; + +/* Like `getdelim', but reads up to a newline. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern _IO_ssize_t getline (char **__restrict __lineptr, + size_t *__restrict __n, + FILE *__restrict __stream) __wur; +#endif +# 682 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Write a string to STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fputs (const char *__restrict __s, FILE *__restrict __stream); + +/* Write a string, followed by a newline, to stdout. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int puts (const char *__s); + + +/* Push a character back onto the input buffer of STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int ungetc (int __c, FILE *__stream); + + +/* Read chunks of generic data from STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern size_t fread (void *__restrict __ptr, size_t __size, + size_t __n, FILE *__restrict __stream) __wur; +/* Write chunks of generic data to STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern size_t fwrite (const void *__restrict __ptr, size_t __size, + size_t __n, FILE *__restrict __s); +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* This function does the same as `fputs' but does not lock the stream. + + This function is not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation it is a cancellation point and + therefore not marked with __THROW. */ +extern int fputs_unlocked (const char *__restrict __s, + FILE *__restrict __stream); +#endif +# 729 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_MISC +/* Faster versions when locking is not necessary. + + These functions are not part of POSIX and therefore no official + cancellation point. But due to similarity with an POSIX interface + or due to the implementation they are cancellation points and + therefore not marked with __THROW. */ +extern size_t fread_unlocked (void *__restrict __ptr, size_t __size, + size_t __n, FILE *__restrict __stream) __wur; +extern size_t fwrite_unlocked (const void *__restrict __ptr, size_t __size, + size_t __n, FILE *__restrict __stream); +#endif +# 742 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Seek to a certain position on STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fseek (FILE *__stream, long int __off, int __whence); +/* Return the current position of STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern long int ftell (FILE *__stream) __wur; +/* Rewind to the beginning of STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern void rewind (FILE *__stream); +__END_NAMESPACE_STD + +/* The Single Unix Specification, Version 2, specifies an alternative, + more adequate interface for the two functions above which deal with + file offset. `long int' is not the right type. These definitions + are originally defined in the Large File Support API. */ + +#if defined __USE_LARGEFILE || defined __USE_XOPEN2K +# ifndef __USE_FILE_OFFSET64 +/* Seek to a certain position on STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fseeko (FILE *__stream, __off_t __off, int __whence); +/* Return the current position of STREAM. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern __off_t ftello (FILE *__stream) __wur; +# else +# 780 "/usr/include/stdio.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (fseeko, + (FILE *__stream, __off64_t __off, int __whence), + fseeko64); +extern __off64_t __REDIRECT (ftello, (FILE *__stream), ftello64); +# else +# 786 "/usr/include/stdio.h" 3 4 +# define fseeko fseeko64 +# define ftello ftello64 +# endif +# 789 "/usr/include/stdio.h" 3 4 +# endif +# 790 "/usr/include/stdio.h" 3 4 +#endif +# 791 "/usr/include/stdio.h" 3 4 + +__BEGIN_NAMESPACE_STD +#ifndef __USE_FILE_OFFSET64 +/* Get STREAM's position. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fgetpos (FILE *__restrict __stream, fpos_t *__restrict __pos); +/* Set STREAM's position. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int fsetpos (FILE *__stream, const fpos_t *__pos); +#else +# 805 "/usr/include/stdio.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (fgetpos, (FILE *__restrict __stream, + fpos_t *__restrict __pos), fgetpos64); +extern int __REDIRECT (fsetpos, + (FILE *__stream, const fpos_t *__pos), fsetpos64); +# else +# 811 "/usr/include/stdio.h" 3 4 +# define fgetpos fgetpos64 +# define fsetpos fsetpos64 +# endif +# 814 "/usr/include/stdio.h" 3 4 +#endif +# 815 "/usr/include/stdio.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_LARGEFILE64 +extern int fseeko64 (FILE *__stream, __off64_t __off, int __whence); +extern __off64_t ftello64 (FILE *__stream) __wur; +extern int fgetpos64 (FILE *__restrict __stream, fpos64_t *__restrict __pos); +extern int fsetpos64 (FILE *__stream, const fpos64_t *__pos); +#endif +# 823 "/usr/include/stdio.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Clear the error and EOF indicators for STREAM. */ +extern void clearerr (FILE *__stream) __THROW; +/* Return the EOF indicator for STREAM. */ +extern int feof (FILE *__stream) __THROW __wur; +/* Return the error indicator for STREAM. */ +extern int ferror (FILE *__stream) __THROW __wur; +__END_NAMESPACE_STD + +#ifdef __USE_MISC +/* Faster versions when locking is not required. */ +extern void clearerr_unlocked (FILE *__stream) __THROW; +extern int feof_unlocked (FILE *__stream) __THROW __wur; +extern int ferror_unlocked (FILE *__stream) __THROW __wur; +#endif +# 839 "/usr/include/stdio.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Print a message describing the meaning of the value of errno. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern void perror (const char *__s); +__END_NAMESPACE_STD + +/* Provide the declarations for `sys_errlist' and `sys_nerr' if they + are available on this system. Even if available, these variables + should not be used directly. The `strerror' function provides + all the necessary functionality. */ +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 853 "/usr/include/stdio.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h" 1 3 4 +/* Declare sys_errlist and sys_nerr, or don't. Compatibility (do) version. + Copyright (C) 2002-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STDIO_H +# error "Never include directly; use instead." +#endif +# 22 "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h" 3 4 + +/* sys_errlist and sys_nerr are deprecated. Use strerror instead. */ + +#ifdef __USE_BSD +extern int sys_nerr; +extern const char *const sys_errlist[]; +#endif +# 29 "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h" 3 4 +#ifdef __USE_GNU +extern int _sys_nerr; +extern const char *const _sys_errlist[]; +#endif +# 33 "/usr/include/x86_64-linux-gnu/bits/sys_errlist.h" 3 4 +# 854 "/usr/include/stdio.h" 2 3 4 + + +#ifdef __USE_POSIX +/* Return the system file descriptor for STREAM. */ +extern int fileno (FILE *__stream) __THROW __wur; +#endif /* Use POSIX. */ +# 860 "/usr/include/stdio.h" 3 4 + +#ifdef __USE_MISC +/* Faster version when locking is not required. */ +extern int fileno_unlocked (FILE *__stream) __THROW __wur; +#endif +# 865 "/usr/include/stdio.h" 3 4 + + +#if (defined __USE_POSIX2 || defined __USE_SVID || defined __USE_BSD || \ + defined __USE_MISC) +/* Create a new stream connected to a pipe running the given command. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern FILE *popen (const char *__command, const char *__modes) __wur; + +/* Close a stream opened by popen and return the status of its child. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +extern int pclose (FILE *__stream); +#endif +# 881 "/usr/include/stdio.h" 3 4 + + +#ifdef __USE_POSIX +/* Return the name of the controlling terminal. */ +extern char *ctermid (char *__s) __THROW; +#endif /* Use POSIX. */ +# 887 "/usr/include/stdio.h" 3 4 + + +#ifdef __USE_XOPEN +/* Return the name of the current user. */ +extern char *cuserid (char *__s); +#endif /* Use X/Open, but not issue 6. */ +# 893 "/usr/include/stdio.h" 3 4 + + +#ifdef __USE_GNU +struct obstack; /* See . */ + +/* Write formatted output to an obstack. */ +extern int obstack_printf (struct obstack *__restrict __obstack, + const char *__restrict __format, ...) + __THROWNL __attribute__ ((__format__ (__printf__, 2, 3))); +extern int obstack_vprintf (struct obstack *__restrict __obstack, + const char *__restrict __format, + _G_va_list __args) + __THROWNL __attribute__ ((__format__ (__printf__, 2, 0))); +#endif /* Use GNU. */ +# 907 "/usr/include/stdio.h" 3 4 + + +#if defined __USE_POSIX || defined __USE_MISC +/* These are defined in POSIX.1:1996. */ + +/* Acquire ownership of STREAM. */ +extern void flockfile (FILE *__stream) __THROW; + +/* Try to acquire ownership of STREAM but do not block if it is not + possible. */ +extern int ftrylockfile (FILE *__stream) __THROW __wur; + +/* Relinquish the ownership granted for STREAM. */ +extern void funlockfile (FILE *__stream) __THROW; +#endif /* POSIX || misc */ +# 922 "/usr/include/stdio.h" 3 4 + +#if defined __USE_XOPEN && !defined __USE_XOPEN2K && !defined __USE_GNU +/* The X/Open standard requires some functions and variables to be + declared here which do not belong into this header. But we have to + follow. In GNU mode we don't do this nonsense. */ +# define __need_getopt +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 928 "/usr/include/stdio.h" 3 4 +# 929 "/usr/include/stdio.h" 3 4 +#endif /* X/Open, but not issue 6 and not for GNU. */ +# 930 "/usr/include/stdio.h" 3 4 + +/* If we are compiling with optimizing read this file. It contains + several optimizing inline functions and macros. */ +#ifdef __USE_EXTERN_INLINES +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 934 "/usr/include/stdio.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 1 3 4 +/* Optimizing macros and inline functions for stdio functions. + Copyright (C) 1998-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STDIO_H +# error "Never include directly; use instead." +#endif +# 22 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + +#ifndef __extern_inline +# define __STDIO_INLINE inline +#else +# 26 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 +# define __STDIO_INLINE __extern_inline +#endif +# 28 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +#ifdef __USE_EXTERN_INLINES +/* For -D_FORTIFY_SOURCE{,=2} bits/stdio2.h will define a different + inline. */ +# if !(__USE_FORTIFY_LEVEL > 0 && defined __fortify_function) +/* Write formatted output to stdout from argument list ARG. */ +__STDIO_INLINE int +vprintf (const char *__restrict __fmt, _G_va_list __arg) +{ + return vfprintf (stdout, __fmt, __arg); +} +# endif +# 41 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + +/* Read a character from stdin. */ +__STDIO_INLINE int +getchar (void) +{ + return _IO_getc (stdin); +} + + +# ifdef __USE_MISC +/* Faster version when locking is not necessary. */ +__STDIO_INLINE int +fgetc_unlocked (FILE *__fp) +{ + return _IO_getc_unlocked (__fp); +} +# endif /* misc */ +# 58 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +# if defined __USE_POSIX || defined __USE_MISC +/* This is defined in POSIX.1:1996. */ +__STDIO_INLINE int +getc_unlocked (FILE *__fp) +{ + return _IO_getc_unlocked (__fp); +} + +/* This is defined in POSIX.1:1996. */ +__STDIO_INLINE int +getchar_unlocked (void) +{ + return _IO_getc_unlocked (stdin); +} +# endif /* POSIX || misc */ +# 75 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +/* Write a character to stdout. */ +__STDIO_INLINE int +putchar (int __c) +{ + return _IO_putc (__c, stdout); +} + + +# ifdef __USE_MISC +/* Faster version when locking is not necessary. */ +__STDIO_INLINE int +fputc_unlocked (int __c, FILE *__stream) +{ + return _IO_putc_unlocked (__c, __stream); +} +# endif /* misc */ +# 93 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +# if defined __USE_POSIX || defined __USE_MISC +/* This is defined in POSIX.1:1996. */ +__STDIO_INLINE int +putc_unlocked (int __c, FILE *__stream) +{ + return _IO_putc_unlocked (__c, __stream); +} + +/* This is defined in POSIX.1:1996. */ +__STDIO_INLINE int +putchar_unlocked (int __c) +{ + return _IO_putc_unlocked (__c, stdout); +} +# endif /* POSIX || misc */ +# 110 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +# ifdef __USE_GNU +/* Like `getdelim', but reads up to a newline. */ +__STDIO_INLINE _IO_ssize_t +getline (char **__lineptr, size_t *__n, FILE *__stream) +{ + return __getdelim (__lineptr, __n, '\n', __stream); +} +# endif /* GNU */ +# 120 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +# ifdef __USE_MISC +/* Faster versions when locking is not required. */ +__STDIO_INLINE int +__NTH (feof_unlocked (FILE *__stream)) +{ + return _IO_feof_unlocked (__stream); +} + +/* Faster versions when locking is not required. */ +__STDIO_INLINE int +__NTH (ferror_unlocked (FILE *__stream)) +{ + return _IO_ferror_unlocked (__stream); +} +# endif /* misc */ +# 137 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + +#endif /* Use extern inlines. */ +# 139 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + + +#if defined __USE_MISC && defined __GNUC__ && defined __OPTIMIZE__ \ + && !defined __cplusplus +/* Perform some simple optimizations. */ +# define fread_unlocked(ptr, size, n, stream) \ + (__extension__ ((__builtin_constant_p (size) && __builtin_constant_p (n) \ + && (size_t) (size) * (size_t) (n) <= 8 \ + && (size_t) (size) != 0) \ + ? ({ char *__ptr = (char *) (ptr); \ + FILE *__stream = (stream); \ + size_t __cnt; \ + for (__cnt = (size_t) (size) * (size_t) (n); \ + __cnt > 0; --__cnt) \ + { \ + int __c = _IO_getc_unlocked (__stream); \ + if (__c == EOF) \ + break; \ + *__ptr++ = __c; \ + } \ + ((size_t) (size) * (size_t) (n) - __cnt) \ + / (size_t) (size); }) \ + : (((__builtin_constant_p (size) && (size_t) (size) == 0) \ + || (__builtin_constant_p (n) && (size_t) (n) == 0)) \ + /* Evaluate all parameters once. */ \ + ? ((void) (ptr), (void) (stream), (void) (size), \ + (void) (n), (size_t) 0) \ + : fread_unlocked (ptr, size, n, stream)))) + +# define fwrite_unlocked(ptr, size, n, stream) \ + (__extension__ ((__builtin_constant_p (size) && __builtin_constant_p (n) \ + && (size_t) (size) * (size_t) (n) <= 8 \ + && (size_t) (size) != 0) \ + ? ({ const char *__ptr = (const char *) (ptr); \ + FILE *__stream = (stream); \ + size_t __cnt; \ + for (__cnt = (size_t) (size) * (size_t) (n); \ + __cnt > 0; --__cnt) \ + if (_IO_putc_unlocked (*__ptr++, __stream) == EOF) \ + break; \ + ((size_t) (size) * (size_t) (n) - __cnt) \ + / (size_t) (size); }) \ + : (((__builtin_constant_p (size) && (size_t) (size) == 0) \ + || (__builtin_constant_p (n) && (size_t) (n) == 0)) \ + /* Evaluate all parameters once. */ \ + ? ((void) (ptr), (void) (stream), (void) (size), \ + (void) (n), (size_t) 0) \ + : fwrite_unlocked (ptr, size, n, stream)))) +#endif +# 188 "/usr/include/x86_64-linux-gnu/bits/stdio.h" 3 4 + +/* Define helper macro. */ +#undef __STDIO_INLINE +# 935 "/usr/include/stdio.h" 2 3 4 +#endif +# 936 "/usr/include/stdio.h" 3 4 +#if __USE_FORTIFY_LEVEL > 0 && defined __extern_always_inline +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 937 "/usr/include/stdio.h" 3 4 +# 938 "/usr/include/stdio.h" 3 4 +#endif +# 939 "/usr/include/stdio.h" 3 4 +#ifdef __LDBL_COMPAT +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 940 "/usr/include/stdio.h" 3 4 +# 941 "/usr/include/stdio.h" 3 4 +#endif +# 942 "/usr/include/stdio.h" 3 4 + +__END_DECLS + +#endif /* included. */ +# 946 "/usr/include/stdio.h" 3 4 + +#endif /* !_STDIO_H */ +# 948 "/usr/include/stdio.h" 3 4 +# 3 "oski.c" 2 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 3 "oski.c" +# 1 "/usr/include/stdlib.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * ISO C99 Standard: 7.20 General utilities + */ + +#ifndef _STDLIB_H + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 24 "/usr/include/stdlib.h" 3 4 +# 25 "/usr/include/stdlib.h" 3 4 + +/* Get size_t, wchar_t and NULL from . */ +#define __need_size_t +#ifndef __need_malloc_and_calloc +# define __need_wchar_t +# define __need_NULL +#endif +# 32 "/usr/include/stdlib.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 32 "/usr/include/stdlib.h" 3 4 +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 1 3 4 +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ + defined(__need_size_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_wint_t) + +#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ + !defined(__need_wchar_t) && !defined(__need_NULL) && \ + !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) +#define __STDDEF_H +#endif +# 37 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define __need_ptrdiff_t +#define __need_size_t +#define __need_wchar_t +#define __need_NULL +#define __need_STDDEF_H_misc +/* __need_wint_t is intentionally not defined here. */ +#endif +# 44 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_ptrdiff_t) +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +# 51 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif +# 53 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_ptrdiff_t +#endif /* defined(__need_ptrdiff_t) */ +# 55 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_size_t) +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +# 62 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ size_t; +#endif +# 64 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_size_t +#endif /*defined(__need_size_t) */ +# 66 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +# 76 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ rsize_t; +#endif +# 78 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif /* defined(__need_STDDEF_H_misc) */ +# 79 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_wchar_t) +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +# 89 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 90 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WCHAR_TYPE__ wchar_t; +#endif +# 92 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 93 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wchar_t +#endif /* defined(__need_wchar_t) */ +# 95 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# 102 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL 0 +# endif +# 104 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#else +# 105 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL ((void*)0) +#endif +# 107 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +# 112 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 113 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_NULL +#endif /* defined(__need_NULL) */ +# 115 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L +#if 0 /* expanded by -frewrite-includes */ +#include "__stddef_max_align_t.h" +#endif /* expanded by -frewrite-includes */ +# 118 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 119 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 120 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define offsetof(t, d) __builtin_offsetof(t, d) +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ +# 123 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +# 132 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WINT_TYPE__ wint_t; +#endif +# 134 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wint_t +#endif /* __need_wint_t */ +# 136 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#endif +# 138 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 33 "/usr/include/stdlib.h" 2 3 4 + +__BEGIN_DECLS + +#ifndef __need_malloc_and_calloc +#define _STDLIB_H 1 + +#if (defined __USE_XOPEN || defined __USE_XOPEN2K8) && !defined _SYS_WAIT_H +/* XPG requires a few symbols from being defined. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 41 "/usr/include/stdlib.h" 3 4 +# 42 "/usr/include/stdlib.h" 3 4 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 42 "/usr/include/stdlib.h" 3 4 +# 43 "/usr/include/stdlib.h" 3 4 + +# ifdef __USE_BSD + +/* Lots of hair to allow traditional BSD use of `union wait' + as well as POSIX.1 use of `int' for the status word. */ + +# if defined __GNUC__ && !defined __cplusplus +# define __WAIT_INT(status) \ + (__extension__ (((union { __typeof(status) __in; int __i; }) \ + { .__in = (status) }).__i)) +# else +# 54 "/usr/include/stdlib.h" 3 4 +# define __WAIT_INT(status) (*(int *) &(status)) +# endif +# 56 "/usr/include/stdlib.h" 3 4 + +/* This is the type of the argument to `wait'. The funky union + causes redeclarations with either `int *' or `union wait *' to be + allowed without complaint. __WAIT_STATUS_DEFN is the type used in + the actual function definitions. */ + +# if !defined __GNUC__ || __GNUC__ < 2 || defined __cplusplus +# define __WAIT_STATUS void * +# define __WAIT_STATUS_DEFN void * +# else +# 66 "/usr/include/stdlib.h" 3 4 +/* This works in GCC 2.6.1 and later. */ +typedef union + { + union wait *__uptr; + int *__iptr; + } __WAIT_STATUS __attribute__ ((__transparent_union__)); +# define __WAIT_STATUS_DEFN int * +# endif +# 74 "/usr/include/stdlib.h" 3 4 + +# else /* Don't use BSD. */ +# 76 "/usr/include/stdlib.h" 3 4 + +# define __WAIT_INT(status) (status) +# define __WAIT_STATUS int * +# define __WAIT_STATUS_DEFN int * + +# endif /* Use BSD. */ +# 82 "/usr/include/stdlib.h" 3 4 + +/* Define the macros also would define this way. */ +# define WEXITSTATUS(status) __WEXITSTATUS (__WAIT_INT (status)) +# define WTERMSIG(status) __WTERMSIG (__WAIT_INT (status)) +# define WSTOPSIG(status) __WSTOPSIG (__WAIT_INT (status)) +# define WIFEXITED(status) __WIFEXITED (__WAIT_INT (status)) +# define WIFSIGNALED(status) __WIFSIGNALED (__WAIT_INT (status)) +# define WIFSTOPPED(status) __WIFSTOPPED (__WAIT_INT (status)) +# ifdef __WIFCONTINUED +# define WIFCONTINUED(status) __WIFCONTINUED (__WAIT_INT (status)) +# endif +# 93 "/usr/include/stdlib.h" 3 4 +#endif /* X/Open or XPG7 and not included. */ +# 94 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Returned by `div'. */ +typedef struct + { + int quot; /* Quotient. */ + int rem; /* Remainder. */ + } div_t; + +/* Returned by `ldiv'. */ +#ifndef __ldiv_t_defined +typedef struct + { + long int quot; /* Quotient. */ + long int rem; /* Remainder. */ + } ldiv_t; +# define __ldiv_t_defined 1 +#endif +# 112 "/usr/include/stdlib.h" 3 4 +__END_NAMESPACE_STD + +#if defined __USE_ISOC99 && !defined __lldiv_t_defined +__BEGIN_NAMESPACE_C99 +/* Returned by `lldiv'. */ +__extension__ typedef struct + { + long long int quot; /* Quotient. */ + long long int rem; /* Remainder. */ + } lldiv_t; +# define __lldiv_t_defined 1 +__END_NAMESPACE_C99 +#endif +# 125 "/usr/include/stdlib.h" 3 4 + + +/* The largest number rand will return (same as INT_MAX). */ +#define RAND_MAX 2147483647 + + +/* We define these the same for all machines. + Changes from this to the outside world should be done in `_exit'. */ +#define EXIT_FAILURE 1 /* Failing exit status. */ +#define EXIT_SUCCESS 0 /* Successful exit status. */ + + +/* Maximum length of a multibyte character in the current locale. */ +#define MB_CUR_MAX (__ctype_get_mb_cur_max ()) +extern size_t __ctype_get_mb_cur_max (void) __THROW __wur; + + +__BEGIN_NAMESPACE_STD +/* Convert a string to a floating-point number. */ +extern double atof (const char *__nptr) + __THROW __attribute_pure__ __nonnull ((1)) __wur; +/* Convert a string to an integer. */ +extern int atoi (const char *__nptr) + __THROW __attribute_pure__ __nonnull ((1)) __wur; +/* Convert a string to a long integer. */ +extern long int atol (const char *__nptr) + __THROW __attribute_pure__ __nonnull ((1)) __wur; +__END_NAMESPACE_STD + +#if defined __USE_ISOC99 || defined __USE_MISC +__BEGIN_NAMESPACE_C99 +/* Convert a string to a long long integer. */ +__extension__ extern long long int atoll (const char *__nptr) + __THROW __attribute_pure__ __nonnull ((1)) __wur; +__END_NAMESPACE_C99 +#endif +# 161 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Convert a string to a floating-point number. */ +extern double strtod (const char *__restrict __nptr, + char **__restrict __endptr) + __THROW __nonnull ((1)); +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Likewise for `float' and `long double' sizes of floating-point numbers. */ +extern float strtof (const char *__restrict __nptr, + char **__restrict __endptr) __THROW __nonnull ((1)); + +extern long double strtold (const char *__restrict __nptr, + char **__restrict __endptr) + __THROW __nonnull ((1)); +__END_NAMESPACE_C99 +#endif +# 180 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Convert a string to a long integer. */ +extern long int strtol (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +/* Convert a string to an unsigned long integer. */ +extern unsigned long int strtoul (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +__END_NAMESPACE_STD + +#ifdef __USE_BSD +/* Convert a string to a quadword integer. */ +__extension__ +extern long long int strtoq (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +/* Convert a string to an unsigned quadword integer. */ +__extension__ +extern unsigned long long int strtouq (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +#endif /* Use BSD. */ +# 204 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_ISOC99 || defined __USE_MISC +__BEGIN_NAMESPACE_C99 +/* Convert a string to a quadword integer. */ +__extension__ +extern long long int strtoll (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +/* Convert a string to an unsigned quadword integer. */ +__extension__ +extern unsigned long long int strtoull (const char *__restrict __nptr, + char **__restrict __endptr, int __base) + __THROW __nonnull ((1)); +__END_NAMESPACE_C99 +#endif /* ISO C99 or use MISC. */ +# 219 "/usr/include/stdlib.h" 3 4 + + +#ifdef __USE_GNU +/* The concept of one static locale per category is not very well + thought out. Many applications will need to process its data using + information from several different locales. Another problem is + the implementation of the internationalization handling in the + ISO C++ standard library. To support this another set of + the functions using locale data exist which take an additional + argument. + + Attention: even though several *_l interfaces are part of POSIX:2008, + these are not. */ + +/* Structure for reentrant locale using functions. This is an + (almost) opaque type for the user level programs. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 235 "/usr/include/stdlib.h" 3 4 +# 236 "/usr/include/stdlib.h" 3 4 + +/* Special versions of the functions above which take the locale to + use as an additional parameter. */ +extern long int strtol_l (const char *__restrict __nptr, + char **__restrict __endptr, int __base, + __locale_t __loc) __THROW __nonnull ((1, 4)); + +extern unsigned long int strtoul_l (const char *__restrict __nptr, + char **__restrict __endptr, + int __base, __locale_t __loc) + __THROW __nonnull ((1, 4)); + +__extension__ +extern long long int strtoll_l (const char *__restrict __nptr, + char **__restrict __endptr, int __base, + __locale_t __loc) + __THROW __nonnull ((1, 4)); + +__extension__ +extern unsigned long long int strtoull_l (const char *__restrict __nptr, + char **__restrict __endptr, + int __base, __locale_t __loc) + __THROW __nonnull ((1, 4)); + +extern double strtod_l (const char *__restrict __nptr, + char **__restrict __endptr, __locale_t __loc) + __THROW __nonnull ((1, 3)); + +extern float strtof_l (const char *__restrict __nptr, + char **__restrict __endptr, __locale_t __loc) + __THROW __nonnull ((1, 3)); + +extern long double strtold_l (const char *__restrict __nptr, + char **__restrict __endptr, + __locale_t __loc) + __THROW __nonnull ((1, 3)); +#endif /* GNU */ +# 273 "/usr/include/stdlib.h" 3 4 + + +#ifdef __USE_EXTERN_INLINES +__BEGIN_NAMESPACE_STD +__extern_inline int +__NTH (atoi (const char *__nptr)) +{ + return (int) strtol (__nptr, (char **) NULL, 10); +} +__extern_inline long int +__NTH (atol (const char *__nptr)) +{ + return strtol (__nptr, (char **) NULL, 10); +} +__END_NAMESPACE_STD + +# if defined __USE_MISC || defined __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +__extension__ __extern_inline long long int +__NTH (atoll (const char *__nptr)) +{ + return strtoll (__nptr, (char **) NULL, 10); +} +__END_NAMESPACE_C99 +# endif +# 298 "/usr/include/stdlib.h" 3 4 +#endif /* Optimizing and Inlining. */ +# 299 "/usr/include/stdlib.h" 3 4 + + +#if defined __USE_SVID || defined __USE_XOPEN_EXTENDED +/* Convert N to base 64 using the digits "./0-9A-Za-z", least-significant + digit first. Returns a pointer to static storage overwritten by the + next call. */ +extern char *l64a (long int __n) __THROW __wur; + +/* Read a number from a string S in base 64 as above. */ +extern long int a64l (const char *__s) + __THROW __attribute_pure__ __nonnull ((1)) __wur; + +#endif /* Use SVID || extended X/Open. */ +# 312 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_SVID || defined __USE_XOPEN_EXTENDED || defined __USE_BSD +#if 0 /* expanded by -frewrite-includes */ +# include /* we need int32_t... */ +#endif /* expanded by -frewrite-includes */ +# 314 "/usr/include/stdlib.h" 3 4 +# 315 "/usr/include/stdlib.h" 3 4 + +/* These are the functions that actually do things. The `random', `srandom', + `initstate' and `setstate' functions are those from BSD Unices. + The `rand' and `srand' functions are required by the ANSI standard. + We provide both interfaces to the same random number generator. */ +/* Return a random long integer between 0 and RAND_MAX inclusive. */ +extern long int random (void) __THROW; + +/* Seed the random number generator with the given number. */ +extern void srandom (unsigned int __seed) __THROW; + +/* Initialize the random number generator to use state buffer STATEBUF, + of length STATELEN, and seed it with SEED. Optimal lengths are 8, 16, + 32, 64, 128 and 256, the bigger the better; values less than 8 will + cause an error and values greater than 256 will be rounded down. */ +extern char *initstate (unsigned int __seed, char *__statebuf, + size_t __statelen) __THROW __nonnull ((2)); + +/* Switch the random number generator to state buffer STATEBUF, + which should have been previously initialized by `initstate'. */ +extern char *setstate (char *__statebuf) __THROW __nonnull ((1)); + + +# ifdef __USE_MISC +/* Reentrant versions of the `random' family of functions. + These functions all use the following data structure to contain + state, rather than global state variables. */ + +struct random_data + { + int32_t *fptr; /* Front pointer. */ + int32_t *rptr; /* Rear pointer. */ + int32_t *state; /* Array of state values. */ + int rand_type; /* Type of random number generator. */ + int rand_deg; /* Degree of random number generator. */ + int rand_sep; /* Distance between front and rear. */ + int32_t *end_ptr; /* Pointer behind state table. */ + }; + +extern int random_r (struct random_data *__restrict __buf, + int32_t *__restrict __result) __THROW __nonnull ((1, 2)); + +extern int srandom_r (unsigned int __seed, struct random_data *__buf) + __THROW __nonnull ((2)); + +extern int initstate_r (unsigned int __seed, char *__restrict __statebuf, + size_t __statelen, + struct random_data *__restrict __buf) + __THROW __nonnull ((2, 4)); + +extern int setstate_r (char *__restrict __statebuf, + struct random_data *__restrict __buf) + __THROW __nonnull ((1, 2)); +# endif /* Use misc. */ +# 369 "/usr/include/stdlib.h" 3 4 +#endif /* Use SVID || extended X/Open || BSD. */ +# 370 "/usr/include/stdlib.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return a random integer between 0 and RAND_MAX inclusive. */ +extern int rand (void) __THROW; +/* Seed the random number generator with the given number. */ +extern void srand (unsigned int __seed) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_POSIX +/* Reentrant interface according to POSIX.1. */ +extern int rand_r (unsigned int *__seed) __THROW; +#endif +# 383 "/usr/include/stdlib.h" 3 4 + + +#if defined __USE_SVID || defined __USE_XOPEN +/* System V style 48-bit random number generator functions. */ + +/* Return non-negative, double-precision floating-point value in [0.0,1.0). */ +extern double drand48 (void) __THROW; +extern double erand48 (unsigned short int __xsubi[3]) __THROW __nonnull ((1)); + +/* Return non-negative, long integer in [0,2^31). */ +extern long int lrand48 (void) __THROW; +extern long int nrand48 (unsigned short int __xsubi[3]) + __THROW __nonnull ((1)); + +/* Return signed, long integers in [-2^31,2^31). */ +extern long int mrand48 (void) __THROW; +extern long int jrand48 (unsigned short int __xsubi[3]) + __THROW __nonnull ((1)); + +/* Seed random number generator. */ +extern void srand48 (long int __seedval) __THROW; +extern unsigned short int *seed48 (unsigned short int __seed16v[3]) + __THROW __nonnull ((1)); +extern void lcong48 (unsigned short int __param[7]) __THROW __nonnull ((1)); + +# ifdef __USE_MISC +/* Data structure for communication with thread safe versions. This + type is to be regarded as opaque. It's only exported because users + have to allocate objects of this type. */ +struct drand48_data + { + unsigned short int __x[3]; /* Current state. */ + unsigned short int __old_x[3]; /* Old state. */ + unsigned short int __c; /* Additive const. in congruential formula. */ + unsigned short int __init; /* Flag for initializing. */ + __extension__ unsigned long long int __a; /* Factor in congruential + formula. */ + }; + +/* Return non-negative, double-precision floating-point value in [0.0,1.0). */ +extern int drand48_r (struct drand48_data *__restrict __buffer, + double *__restrict __result) __THROW __nonnull ((1, 2)); +extern int erand48_r (unsigned short int __xsubi[3], + struct drand48_data *__restrict __buffer, + double *__restrict __result) __THROW __nonnull ((1, 2)); + +/* Return non-negative, long integer in [0,2^31). */ +extern int lrand48_r (struct drand48_data *__restrict __buffer, + long int *__restrict __result) + __THROW __nonnull ((1, 2)); +extern int nrand48_r (unsigned short int __xsubi[3], + struct drand48_data *__restrict __buffer, + long int *__restrict __result) + __THROW __nonnull ((1, 2)); + +/* Return signed, long integers in [-2^31,2^31). */ +extern int mrand48_r (struct drand48_data *__restrict __buffer, + long int *__restrict __result) + __THROW __nonnull ((1, 2)); +extern int jrand48_r (unsigned short int __xsubi[3], + struct drand48_data *__restrict __buffer, + long int *__restrict __result) + __THROW __nonnull ((1, 2)); + +/* Seed random number generator. */ +extern int srand48_r (long int __seedval, struct drand48_data *__buffer) + __THROW __nonnull ((2)); + +extern int seed48_r (unsigned short int __seed16v[3], + struct drand48_data *__buffer) __THROW __nonnull ((1, 2)); + +extern int lcong48_r (unsigned short int __param[7], + struct drand48_data *__buffer) + __THROW __nonnull ((1, 2)); +# endif /* Use misc. */ +# 458 "/usr/include/stdlib.h" 3 4 +#endif /* Use SVID or X/Open. */ +# 459 "/usr/include/stdlib.h" 3 4 + +#endif /* don't just need malloc and calloc */ +# 461 "/usr/include/stdlib.h" 3 4 + +#ifndef __malloc_and_calloc_defined +# define __malloc_and_calloc_defined +__BEGIN_NAMESPACE_STD +/* Allocate SIZE bytes of memory. */ +extern void *malloc (size_t __size) __THROW __attribute_malloc__ __wur; +/* Allocate NMEMB elements of SIZE bytes each, all initialized to 0. */ +extern void *calloc (size_t __nmemb, size_t __size) + __THROW __attribute_malloc__ __wur; +__END_NAMESPACE_STD +#endif +# 472 "/usr/include/stdlib.h" 3 4 + +#ifndef __need_malloc_and_calloc +__BEGIN_NAMESPACE_STD +/* Re-allocate the previously allocated block + in PTR, making the new block SIZE bytes long. */ +/* __attribute_malloc__ is not used, because if realloc returns + the same pointer that was passed to it, aliasing needs to be allowed + between objects pointed by the old and new pointers. */ +extern void *realloc (void *__ptr, size_t __size) + __THROW __attribute_warn_unused_result__; +/* Free a block allocated by `malloc', `realloc' or `calloc'. */ +extern void free (void *__ptr) __THROW; +__END_NAMESPACE_STD + +#ifdef __USE_MISC +/* Free a block. An alias for `free'. (Sun Unices). */ +extern void cfree (void *__ptr) __THROW; +#endif /* Use misc. */ +# 490 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_GNU || defined __USE_BSD || defined __USE_MISC +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 492 "/usr/include/stdlib.h" 3 4 +# 493 "/usr/include/stdlib.h" 3 4 +#endif /* Use GNU, BSD, or misc. */ +# 494 "/usr/include/stdlib.h" 3 4 + +#if (defined __USE_XOPEN_EXTENDED && !defined __USE_XOPEN2K) \ + || defined __USE_BSD +/* Allocate SIZE bytes on a page boundary. The storage cannot be freed. */ +extern void *valloc (size_t __size) __THROW __attribute_malloc__ __wur; +#endif +# 500 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_XOPEN2K +/* Allocate memory of SIZE bytes with an alignment of ALIGNMENT. */ +extern int posix_memalign (void **__memptr, size_t __alignment, size_t __size) + __THROW __nonnull ((1)) __wur; +#endif +# 506 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_ISOC11 +/* ISO C variant of aligned allocation. */ +extern void *aligned_alloc (size_t __alignment, size_t __size) + __THROW __attribute_malloc__ __attribute_alloc_size__ ((2)) __wur; +#endif +# 512 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Abort execution and generate a core-dump. */ +extern void abort (void) __THROW __attribute__ ((__noreturn__)); + + +/* Register a function to be called when `exit' is called. */ +extern int atexit (void (*__func) (void)) __THROW __nonnull ((1)); + +#if defined __USE_ISOC11 || defined __USE_ISOCXX11 +/* Register a function to be called when `quick_exit' is called. */ +# ifdef __cplusplus +extern "C++" int at_quick_exit (void (*__func) (void)) + __THROW __asm ("at_quick_exit") __nonnull ((1)); +# else +# 527 "/usr/include/stdlib.h" 3 4 +extern int at_quick_exit (void (*__func) (void)) __THROW __nonnull ((1)); +# endif +# 529 "/usr/include/stdlib.h" 3 4 +#endif +# 530 "/usr/include/stdlib.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_MISC +/* Register a function to be called with the status + given to `exit' and the given argument. */ +extern int on_exit (void (*__func) (int __status, void *__arg), void *__arg) + __THROW __nonnull ((1)); +#endif +# 538 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Call all functions registered with `atexit' and `on_exit', + in the reverse of the order in which they were registered, + perform stdio cleanup, and terminate program execution with STATUS. */ +extern void exit (int __status) __THROW __attribute__ ((__noreturn__)); + +#if defined __USE_ISOC11 || defined __USE_ISOCXX11 +/* Call all functions registered with `at_quick_exit' in the reverse + of the order in which they were registered and terminate program + execution with STATUS. */ +extern void quick_exit (int __status) __THROW __attribute__ ((__noreturn__)); +#endif +# 551 "/usr/include/stdlib.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +/* Terminate the program with STATUS without calling any of the + functions registered with `atexit' or `on_exit'. */ +extern void _Exit (int __status) __THROW __attribute__ ((__noreturn__)); +__END_NAMESPACE_C99 +#endif +# 560 "/usr/include/stdlib.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return the value of envariable NAME, or NULL if it doesn't exist. */ +extern char *getenv (const char *__name) __THROW __nonnull ((1)) __wur; +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* This function is similar to the above but returns NULL if the + programs is running with SUID or SGID enabled. */ +extern char *secure_getenv (const char *__name) + __THROW __nonnull ((1)) __wur; +#endif +# 573 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_SVID || defined __USE_XOPEN +/* The SVID says this is in , but this seems a better place. */ +/* Put STRING, which is of the form "NAME=VALUE", in the environment. + If there is no `=', remove NAME from the environment. */ +extern int putenv (char *__string) __THROW __nonnull ((1)); +#endif +# 580 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_BSD || defined __USE_XOPEN2K +/* Set NAME to VALUE in the environment. + If REPLACE is nonzero, overwrite an existing value. */ +extern int setenv (const char *__name, const char *__value, int __replace) + __THROW __nonnull ((2)); + +/* Remove the variable NAME from the environment. */ +extern int unsetenv (const char *__name) __THROW __nonnull ((1)); +#endif +# 590 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_MISC +/* The `clearenv' was planned to be added to POSIX.1 but probably + never made it. Nevertheless the POSIX.9 standard (POSIX bindings + for Fortran 77) requires this function. */ +extern int clearenv (void) __THROW; +#endif +# 597 "/usr/include/stdlib.h" 3 4 + + +#if defined __USE_MISC \ + || (defined __USE_XOPEN_EXTENDED && !defined __USE_XOPEN2K8) +/* Generate a unique temporary file name from TEMPLATE. + The last six characters of TEMPLATE must be "XXXXXX"; + they are replaced with a string that makes the file name unique. + Always returns TEMPLATE, it's either a temporary file name or a null + string if it cannot get a unique file name. */ +extern char *mktemp (char *__template) __THROW __nonnull ((1)); +#endif +# 608 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_MISC || defined __USE_XOPEN_EXTENDED \ + || defined __USE_XOPEN2K8 +/* Generate a unique temporary file name from TEMPLATE. + The last six characters of TEMPLATE must be "XXXXXX"; + they are replaced with a string that makes the filename unique. + Returns a file descriptor open on the file for reading and writing, + or -1 if it cannot create a uniquely-named file. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +# ifndef __USE_FILE_OFFSET64 +extern int mkstemp (char *__template) __nonnull ((1)) __wur; +# else +# 622 "/usr/include/stdlib.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (mkstemp, (char *__template), mkstemp64) + __nonnull ((1)) __wur; +# else +# 626 "/usr/include/stdlib.h" 3 4 +# define mkstemp mkstemp64 +# endif +# 628 "/usr/include/stdlib.h" 3 4 +# endif +# 629 "/usr/include/stdlib.h" 3 4 +# ifdef __USE_LARGEFILE64 +extern int mkstemp64 (char *__template) __nonnull ((1)) __wur; +# endif +# 632 "/usr/include/stdlib.h" 3 4 +#endif +# 633 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_MISC +/* Similar to mkstemp, but the template can have a suffix after the + XXXXXX. The length of the suffix is specified in the second + parameter. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +# ifndef __USE_FILE_OFFSET64 +extern int mkstemps (char *__template, int __suffixlen) __nonnull ((1)) __wur; +# else +# 644 "/usr/include/stdlib.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (mkstemps, (char *__template, int __suffixlen), + mkstemps64) __nonnull ((1)) __wur; +# else +# 648 "/usr/include/stdlib.h" 3 4 +# define mkstemps mkstemps64 +# endif +# 650 "/usr/include/stdlib.h" 3 4 +# endif +# 651 "/usr/include/stdlib.h" 3 4 +# ifdef __USE_LARGEFILE64 +extern int mkstemps64 (char *__template, int __suffixlen) + __nonnull ((1)) __wur; +# endif +# 655 "/usr/include/stdlib.h" 3 4 +#endif +# 656 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_BSD || defined __USE_XOPEN2K8 +/* Create a unique temporary directory from TEMPLATE. + The last six characters of TEMPLATE must be "XXXXXX"; + they are replaced with a string that makes the directory name unique. + Returns TEMPLATE, or a null pointer if it cannot get a unique name. + The directory is created mode 700. */ +extern char *mkdtemp (char *__template) __THROW __nonnull ((1)) __wur; +#endif +# 665 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_GNU +/* Generate a unique temporary file name from TEMPLATE similar to + mkstemp. But allow the caller to pass additional flags which are + used in the open call to create the file.. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +# ifndef __USE_FILE_OFFSET64 +extern int mkostemp (char *__template, int __flags) __nonnull ((1)) __wur; +# else +# 676 "/usr/include/stdlib.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (mkostemp, (char *__template, int __flags), mkostemp64) + __nonnull ((1)) __wur; +# else +# 680 "/usr/include/stdlib.h" 3 4 +# define mkostemp mkostemp64 +# endif +# 682 "/usr/include/stdlib.h" 3 4 +# endif +# 683 "/usr/include/stdlib.h" 3 4 +# ifdef __USE_LARGEFILE64 +extern int mkostemp64 (char *__template, int __flags) __nonnull ((1)) __wur; +# endif +# 686 "/usr/include/stdlib.h" 3 4 + +/* Similar to mkostemp, but the template can have a suffix after the + XXXXXX. The length of the suffix is specified in the second + parameter. + + This function is a possible cancellation point and therefore not + marked with __THROW. */ +# ifndef __USE_FILE_OFFSET64 +extern int mkostemps (char *__template, int __suffixlen, int __flags) + __nonnull ((1)) __wur; +# else +# 697 "/usr/include/stdlib.h" 3 4 +# ifdef __REDIRECT +extern int __REDIRECT (mkostemps, (char *__template, int __suffixlen, + int __flags), mkostemps64) + __nonnull ((1)) __wur; +# else +# 702 "/usr/include/stdlib.h" 3 4 +# define mkostemps mkostemps64 +# endif +# 704 "/usr/include/stdlib.h" 3 4 +# endif +# 705 "/usr/include/stdlib.h" 3 4 +# ifdef __USE_LARGEFILE64 +extern int mkostemps64 (char *__template, int __suffixlen, int __flags) + __nonnull ((1)) __wur; +# endif +# 709 "/usr/include/stdlib.h" 3 4 +#endif +# 710 "/usr/include/stdlib.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Execute the given line as a shell command. + + This function is a cancellation point and therefore not marked with + __THROW. */ +extern int system (const char *__command) __wur; +__END_NAMESPACE_STD + + +#ifdef __USE_GNU +/* Return a malloc'd string containing the canonical absolute name of the + existing named file. */ +extern char *canonicalize_file_name (const char *__name) + __THROW __nonnull ((1)) __wur; +#endif +# 727 "/usr/include/stdlib.h" 3 4 + +#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED +/* Return the canonical absolute name of file NAME. If RESOLVED is + null, the result is malloc'd; otherwise, if the canonical name is + PATH_MAX chars or more, returns null with `errno' set to + ENAMETOOLONG; if the name fits in fewer than PATH_MAX chars, + returns the name in RESOLVED. */ +extern char *realpath (const char *__restrict __name, + char *__restrict __resolved) __THROW __wur; +#endif +# 737 "/usr/include/stdlib.h" 3 4 + + +/* Shorthand for type of comparison functions. */ +#ifndef __COMPAR_FN_T +# define __COMPAR_FN_T +typedef int (*__compar_fn_t) (const void *, const void *); + +# ifdef __USE_GNU +typedef __compar_fn_t comparison_fn_t; +# endif +# 747 "/usr/include/stdlib.h" 3 4 +#endif +# 748 "/usr/include/stdlib.h" 3 4 +#ifdef __USE_GNU +typedef int (*__compar_d_fn_t) (const void *, const void *, void *); +#endif +# 751 "/usr/include/stdlib.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Do a binary search for KEY in BASE, which consists of NMEMB elements + of SIZE bytes each, using COMPAR to perform the comparisons. */ +extern void *bsearch (const void *__key, const void *__base, + size_t __nmemb, size_t __size, __compar_fn_t __compar) + __nonnull ((1, 2, 5)) __wur; + +#ifdef __USE_EXTERN_INLINES +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 760 "/usr/include/stdlib.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/stdlib-bsearch.h" 1 3 4 +/* Perform binary search - inline version. + Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +__extern_inline void * +bsearch (const void *__key, const void *__base, size_t __nmemb, size_t __size, + __compar_fn_t __compar) +{ + size_t __l, __u, __idx; + const void *__p; + int __comparison; + + __l = 0; + __u = __nmemb; + while (__l < __u) + { + __idx = (__l + __u) / 2; + __p = (void *) (((const char *) __base) + (__idx * __size)); + __comparison = (*__compar) (__key, __p); + if (__comparison < 0) + __u = __idx; + else if (__comparison > 0) + __l = __idx + 1; + else + return (void *) __p; + } + + return NULL; +} +# 761 "/usr/include/stdlib.h" 2 3 4 +#endif +# 762 "/usr/include/stdlib.h" 3 4 + +/* Sort NMEMB elements of BASE, of SIZE bytes each, + using COMPAR to perform the comparisons. */ +extern void qsort (void *__base, size_t __nmemb, size_t __size, + __compar_fn_t __compar) __nonnull ((1, 4)); +#ifdef __USE_GNU +extern void qsort_r (void *__base, size_t __nmemb, size_t __size, + __compar_d_fn_t __compar, void *__arg) + __nonnull ((1, 4)); +#endif +# 772 "/usr/include/stdlib.h" 3 4 + + +/* Return the absolute value of X. */ +extern int abs (int __x) __THROW __attribute__ ((__const__)) __wur; +extern long int labs (long int __x) __THROW __attribute__ ((__const__)) __wur; +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__extension__ extern long long int llabs (long long int __x) + __THROW __attribute__ ((__const__)) __wur; +#endif +# 783 "/usr/include/stdlib.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return the `div_t', `ldiv_t' or `lldiv_t' representation + of the value of NUMER over DENOM. */ +/* GCC may have built-ins for these someday. */ +extern div_t div (int __numer, int __denom) + __THROW __attribute__ ((__const__)) __wur; +extern ldiv_t ldiv (long int __numer, long int __denom) + __THROW __attribute__ ((__const__)) __wur; +__END_NAMESPACE_STD + +#ifdef __USE_ISOC99 +__BEGIN_NAMESPACE_C99 +__extension__ extern lldiv_t lldiv (long long int __numer, + long long int __denom) + __THROW __attribute__ ((__const__)) __wur; +__END_NAMESPACE_C99 +#endif +# 802 "/usr/include/stdlib.h" 3 4 + + +#if (defined __USE_XOPEN_EXTENDED && !defined __USE_XOPEN2K8) \ + || defined __USE_SVID +/* Convert floating point numbers to strings. The returned values are + valid only until another call to the same function. */ + +/* Convert VALUE to a string with NDIGIT digits and return a pointer to + this. Set *DECPT with the position of the decimal character and *SIGN + with the sign of the number. */ +extern char *ecvt (double __value, int __ndigit, int *__restrict __decpt, + int *__restrict __sign) __THROW __nonnull ((3, 4)) __wur; + +/* Convert VALUE to a string rounded to NDIGIT decimal digits. Set *DECPT + with the position of the decimal character and *SIGN with the sign of + the number. */ +extern char *fcvt (double __value, int __ndigit, int *__restrict __decpt, + int *__restrict __sign) __THROW __nonnull ((3, 4)) __wur; + +/* If possible convert VALUE to a string with NDIGIT significant digits. + Otherwise use exponential representation. The resulting string will + be written to BUF. */ +extern char *gcvt (double __value, int __ndigit, char *__buf) + __THROW __nonnull ((3)) __wur; +#endif +# 827 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_MISC +/* Long double versions of above functions. */ +extern char *qecvt (long double __value, int __ndigit, + int *__restrict __decpt, int *__restrict __sign) + __THROW __nonnull ((3, 4)) __wur; +extern char *qfcvt (long double __value, int __ndigit, + int *__restrict __decpt, int *__restrict __sign) + __THROW __nonnull ((3, 4)) __wur; +extern char *qgcvt (long double __value, int __ndigit, char *__buf) + __THROW __nonnull ((3)) __wur; + + +/* Reentrant version of the functions above which provide their own + buffers. */ +extern int ecvt_r (double __value, int __ndigit, int *__restrict __decpt, + int *__restrict __sign, char *__restrict __buf, + size_t __len) __THROW __nonnull ((3, 4, 5)); +extern int fcvt_r (double __value, int __ndigit, int *__restrict __decpt, + int *__restrict __sign, char *__restrict __buf, + size_t __len) __THROW __nonnull ((3, 4, 5)); + +extern int qecvt_r (long double __value, int __ndigit, + int *__restrict __decpt, int *__restrict __sign, + char *__restrict __buf, size_t __len) + __THROW __nonnull ((3, 4, 5)); +extern int qfcvt_r (long double __value, int __ndigit, + int *__restrict __decpt, int *__restrict __sign, + char *__restrict __buf, size_t __len) + __THROW __nonnull ((3, 4, 5)); +#endif /* misc */ +# 858 "/usr/include/stdlib.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return the length of the multibyte character + in S, which is no longer than N. */ +extern int mblen (const char *__s, size_t __n) __THROW; +/* Return the length of the given multibyte character, + putting its `wchar_t' representation in *PWC. */ +extern int mbtowc (wchar_t *__restrict __pwc, + const char *__restrict __s, size_t __n) __THROW; +/* Put the multibyte character represented + by WCHAR in S, returning its length. */ +extern int wctomb (char *__s, wchar_t __wchar) __THROW; + + +/* Convert a multibyte string to a wide char string. */ +extern size_t mbstowcs (wchar_t *__restrict __pwcs, + const char *__restrict __s, size_t __n) __THROW; +/* Convert a wide char string to multibyte string. */ +extern size_t wcstombs (char *__restrict __s, + const wchar_t *__restrict __pwcs, size_t __n) + __THROW; +__END_NAMESPACE_STD + + +#ifdef __USE_SVID +/* Determine whether the string value of RESPONSE matches the affirmation + or negative response expression as specified by the LC_MESSAGES category + in the program's current locale. Returns 1 if affirmative, 0 if + negative, and -1 if not matching. */ +extern int rpmatch (const char *__response) __THROW __nonnull ((1)) __wur; +#endif +# 890 "/usr/include/stdlib.h" 3 4 + + +#if defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K8 +/* Parse comma separated suboption from *OPTIONP and match against + strings in TOKENS. If found return index and set *VALUEP to + optional value introduced by an equal sign. If the suboption is + not part of TOKENS return in *VALUEP beginning of unknown + suboption. On exit *OPTIONP is set to the beginning of the next + token or at the terminating NUL character. */ +extern int getsubopt (char **__restrict __optionp, + char *const *__restrict __tokens, + char **__restrict __valuep) + __THROW __nonnull ((1, 2, 3)) __wur; +#endif +# 904 "/usr/include/stdlib.h" 3 4 + + +#ifdef __USE_XOPEN +/* Setup DES tables according KEY. */ +extern void setkey (const char *__key) __THROW __nonnull ((1)); +#endif +# 910 "/usr/include/stdlib.h" 3 4 + + +/* X/Open pseudo terminal handling. */ + +#ifdef __USE_XOPEN2KXSI +/* Return a master pseudo-terminal handle. */ +extern int posix_openpt (int __oflag) __wur; +#endif +# 918 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_XOPEN +/* The next four functions all take a master pseudo-tty fd and + perform an operation on the associated slave: */ + +/* Chown the slave to the calling user. */ +extern int grantpt (int __fd) __THROW; + +/* Release an internal lock so the slave can be opened. + Call after grantpt(). */ +extern int unlockpt (int __fd) __THROW; + +/* Return the pathname of the pseudo terminal slave associated with + the master FD is open on, or NULL on errors. + The returned storage is good until the next call to this function. */ +extern char *ptsname (int __fd) __THROW __wur; +#endif +# 935 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_GNU +/* Store at most BUFLEN characters of the pathname of the slave pseudo + terminal associated with the master FD is open on in BUF. + Return 0 on success, otherwise an error number. */ +extern int ptsname_r (int __fd, char *__buf, size_t __buflen) + __THROW __nonnull ((2)); + +/* Open a master pseudo terminal and return its file descriptor. */ +extern int getpt (void); +#endif +# 946 "/usr/include/stdlib.h" 3 4 + +#ifdef __USE_BSD +/* Put the 1 minute, 5 minute and 15 minute load averages into the first + NELEM elements of LOADAVG. Return the number written (never more than + three, but may be less than NELEM), or -1 if an error occurred. */ +extern int getloadavg (double __loadavg[], int __nelem) + __THROW __nonnull ((1)); +#endif +# 954 "/usr/include/stdlib.h" 3 4 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 955 "/usr/include/stdlib.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h" 1 3 4 +/* Floating-point inline functions for stdlib.h. + Copyright (C) 2012-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STDLIB_H +# error "Never use directly; include instead." +#endif +# 22 "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h" 3 4 + +#ifdef __USE_EXTERN_INLINES +__BEGIN_NAMESPACE_STD +__extern_inline double +__NTH (atof (const char *__nptr)) +{ + return strtod (__nptr, (char **) NULL); +} +__END_NAMESPACE_STD +#endif /* Optimizing and Inlining. */ +# 32 "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h" 3 4 +# 956 "/usr/include/stdlib.h" 2 3 4 + +/* Define some macros helping to catch buffer overflows. */ +#if __USE_FORTIFY_LEVEL > 0 && defined __fortify_function +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 959 "/usr/include/stdlib.h" 3 4 +# 960 "/usr/include/stdlib.h" 3 4 +#endif +# 961 "/usr/include/stdlib.h" 3 4 +#ifdef __LDBL_COMPAT +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 962 "/usr/include/stdlib.h" 3 4 +# 963 "/usr/include/stdlib.h" 3 4 +#endif +# 964 "/usr/include/stdlib.h" 3 4 + +#endif /* don't just need malloc and calloc */ +# 966 "/usr/include/stdlib.h" 3 4 +#undef __need_malloc_and_calloc + +__END_DECLS + +#endif /* stdlib.h */ +# 971 "/usr/include/stdlib.h" 3 4 +# 4 "oski.c" 2 +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 4 "oski.c" +# 1 "/usr/include/string.h" 1 3 4 +/* Copyright (C) 1991-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +/* + * ISO C99 Standard: 7.21 String handling + */ + +#ifndef _STRING_H +#define _STRING_H 1 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 25 "/usr/include/string.h" 3 4 +# 26 "/usr/include/string.h" 3 4 + +__BEGIN_DECLS + +/* Get size_t and NULL from . */ +#define __need_size_t +#define __need_NULL +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 32 "/usr/include/string.h" 3 4 +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 1 3 4 +/*===---- stddef.h - Basic type definitions --------------------------------=== + * + * Copyright (c) 2008 Eli Friedman + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#if !defined(__STDDEF_H) || defined(__need_ptrdiff_t) || \ + defined(__need_size_t) || defined(__need_wchar_t) || \ + defined(__need_NULL) || defined(__need_wint_t) + +#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \ + !defined(__need_wchar_t) && !defined(__need_NULL) && \ + !defined(__need_wint_t) +/* Always define miscellaneous pieces when modules are available. */ +#if !__has_feature(modules) +#define __STDDEF_H +#endif +# 37 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define __need_ptrdiff_t +#define __need_size_t +#define __need_wchar_t +#define __need_NULL +#define __need_STDDEF_H_misc +/* __need_wint_t is intentionally not defined here. */ +#endif +# 44 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_ptrdiff_t) +#if !defined(_PTRDIFF_T) || __has_feature(modules) +/* Always define ptrdiff_t when modules are available. */ +#if !__has_feature(modules) +#define _PTRDIFF_T +#endif +# 51 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __PTRDIFF_TYPE__ ptrdiff_t; +#endif +# 53 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_ptrdiff_t +#endif /* defined(__need_ptrdiff_t) */ +# 55 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_size_t) +#if !defined(_SIZE_T) || __has_feature(modules) +/* Always define size_t when modules are available. */ +#if !__has_feature(modules) +#define _SIZE_T +#endif +# 62 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ size_t; +#endif +# 64 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_size_t +#endif /*defined(__need_size_t) */ +# 66 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is + * enabled. */ +#if (defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1 && \ + !defined(_RSIZE_T)) || __has_feature(modules) +/* Always define rsize_t when modules are available. */ +#if !__has_feature(modules) +#define _RSIZE_T +#endif +# 76 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __SIZE_TYPE__ rsize_t; +#endif +# 78 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif /* defined(__need_STDDEF_H_misc) */ +# 79 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_wchar_t) +#ifndef __cplusplus +/* Always define wchar_t when modules are available. */ +#if !defined(_WCHAR_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WCHAR_T +#if defined(_MSC_EXTENSIONS) +#define _WCHAR_T_DEFINED +#endif +# 89 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 90 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WCHAR_TYPE__ wchar_t; +#endif +# 92 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 93 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wchar_t +#endif /* defined(__need_wchar_t) */ +# 95 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_NULL) +#undef NULL +#ifdef __cplusplus +# if !defined(__MINGW32__) && !defined(_MSC_VER) +# define NULL __null +# else +# 102 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL 0 +# endif +# 104 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#else +# 105 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# define NULL ((void*)0) +#endif +# 107 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#ifdef __cplusplus +#if defined(_MSC_EXTENSIONS) && defined(_NATIVE_NULLPTR_SUPPORTED) +namespace std { typedef decltype(nullptr) nullptr_t; } +using ::std::nullptr_t; +#endif +# 112 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 113 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_NULL +#endif /* defined(__need_NULL) */ +# 115 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#if defined(__need_STDDEF_H_misc) +#if __STDC_VERSION__ >= 201112L || __cplusplus >= 201103L +#if 0 /* expanded by -frewrite-includes */ +#include "__stddef_max_align_t.h" +#endif /* expanded by -frewrite-includes */ +# 118 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 119 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#endif +# 120 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#define offsetof(t, d) __builtin_offsetof(t, d) +#undef __need_STDDEF_H_misc +#endif /* defined(__need_STDDEF_H_misc) */ +# 123 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +/* Some C libraries expect to see a wint_t here. Others (notably MinGW) will use +__WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */ +#if defined(__need_wint_t) +/* Always define wint_t when modules are available. */ +#if !defined(_WINT_T) || __has_feature(modules) +#if !__has_feature(modules) +#define _WINT_T +#endif +# 132 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +typedef __WINT_TYPE__ wint_t; +#endif +# 134 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +#undef __need_wint_t +#endif /* __need_wint_t */ +# 136 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 + +#endif +# 138 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/stddef.h" 3 4 +# 33 "/usr/include/string.h" 2 3 4 + +/* Provide correct C++ prototypes, and indicate this to the caller. This + requires a compatible C++ standard library. As a heuristic, we provide + these when the compiler indicates full conformance with C++98 or later, + and for older GCC versions that are known to provide a compatible + libstdc++. */ +#if defined __cplusplus && (__cplusplus >= 199711L || __GNUC_PREREQ (4, 4)) +# define __CORRECT_ISO_CPP_STRING_H_PROTO +#endif +# 42 "/usr/include/string.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Copy N bytes of SRC to DEST. */ +extern void *memcpy (void *__restrict __dest, const void *__restrict __src, + size_t __n) __THROW __nonnull ((1, 2)); +/* Copy N bytes of SRC to DEST, guaranteeing + correct behavior for overlapping strings. */ +extern void *memmove (void *__dest, const void *__src, size_t __n) + __THROW __nonnull ((1, 2)); +__END_NAMESPACE_STD + +/* Copy no more than N bytes of SRC to DEST, stopping when C is found. + Return the position in DEST one byte past where C was copied, + or NULL if C was not found in the first N bytes of SRC. */ +#if defined __USE_SVID || defined __USE_BSD || defined __USE_XOPEN +extern void *memccpy (void *__restrict __dest, const void *__restrict __src, + int __c, size_t __n) + __THROW __nonnull ((1, 2)); +#endif /* SVID. */ +# 62 "/usr/include/string.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Set N bytes of S to C. */ +extern void *memset (void *__s, int __c, size_t __n) __THROW __nonnull ((1)); + +/* Compare N bytes of S1 and S2. */ +extern int memcmp (const void *__s1, const void *__s2, size_t __n) + __THROW __attribute_pure__ __nonnull ((1, 2)); + +/* Search N bytes of S for C. */ +#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern void *memchr (void *__s, int __c, size_t __n) + __THROW __asm ("memchr") __attribute_pure__ __nonnull ((1)); +extern const void *memchr (const void *__s, int __c, size_t __n) + __THROW __asm ("memchr") __attribute_pure__ __nonnull ((1)); + +# ifdef __OPTIMIZE__ +__extern_always_inline void * +memchr (void *__s, int __c, size_t __n) __THROW +{ + return __builtin_memchr (__s, __c, __n); +} + +__extern_always_inline const void * +memchr (const void *__s, int __c, size_t __n) __THROW +{ + return __builtin_memchr (__s, __c, __n); +} +# endif +# 94 "/usr/include/string.h" 3 4 +} +#else +# 96 "/usr/include/string.h" 3 4 +extern void *memchr (const void *__s, int __c, size_t __n) + __THROW __attribute_pure__ __nonnull ((1)); +#endif +# 99 "/usr/include/string.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* Search in S for C. This is similar to `memchr' but there is no + length limit. */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" void *rawmemchr (void *__s, int __c) + __THROW __asm ("rawmemchr") __attribute_pure__ __nonnull ((1)); +extern "C++" const void *rawmemchr (const void *__s, int __c) + __THROW __asm ("rawmemchr") __attribute_pure__ __nonnull ((1)); +# else +# 110 "/usr/include/string.h" 3 4 +extern void *rawmemchr (const void *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +# endif +# 113 "/usr/include/string.h" 3 4 + +/* Search N bytes of S for the final occurrence of C. */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" void *memrchr (void *__s, int __c, size_t __n) + __THROW __asm ("memrchr") __attribute_pure__ __nonnull ((1)); +extern "C++" const void *memrchr (const void *__s, int __c, size_t __n) + __THROW __asm ("memrchr") __attribute_pure__ __nonnull ((1)); +# else +# 121 "/usr/include/string.h" 3 4 +extern void *memrchr (const void *__s, int __c, size_t __n) + __THROW __attribute_pure__ __nonnull ((1)); +# endif +# 124 "/usr/include/string.h" 3 4 +#endif +# 125 "/usr/include/string.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Copy SRC to DEST. */ +extern char *strcpy (char *__restrict __dest, const char *__restrict __src) + __THROW __nonnull ((1, 2)); +/* Copy no more than N characters of SRC to DEST. */ +extern char *strncpy (char *__restrict __dest, + const char *__restrict __src, size_t __n) + __THROW __nonnull ((1, 2)); + +/* Append SRC onto DEST. */ +extern char *strcat (char *__restrict __dest, const char *__restrict __src) + __THROW __nonnull ((1, 2)); +/* Append no more than N characters from SRC onto DEST. */ +extern char *strncat (char *__restrict __dest, const char *__restrict __src, + size_t __n) __THROW __nonnull ((1, 2)); + +/* Compare S1 and S2. */ +extern int strcmp (const char *__s1, const char *__s2) + __THROW __attribute_pure__ __nonnull ((1, 2)); +/* Compare N characters of S1 and S2. */ +extern int strncmp (const char *__s1, const char *__s2, size_t __n) + __THROW __attribute_pure__ __nonnull ((1, 2)); + +/* Compare the collated forms of S1 and S2. */ +extern int strcoll (const char *__s1, const char *__s2) + __THROW __attribute_pure__ __nonnull ((1, 2)); +/* Put a transformation of SRC into no more than N bytes of DEST. */ +extern size_t strxfrm (char *__restrict __dest, + const char *__restrict __src, size_t __n) + __THROW __nonnull ((2)); +__END_NAMESPACE_STD + +#ifdef __USE_XOPEN2K8 +/* The following functions are equivalent to the both above but they + take the locale they use for the collation as an extra argument. + This is not standardsized but something like will come. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 163 "/usr/include/string.h" 3 4 +# 164 "/usr/include/string.h" 3 4 + +/* Compare the collated forms of S1 and S2 using rules from L. */ +extern int strcoll_l (const char *__s1, const char *__s2, __locale_t __l) + __THROW __attribute_pure__ __nonnull ((1, 2, 3)); +/* Put a transformation of SRC into no more than N bytes of DEST. */ +extern size_t strxfrm_l (char *__dest, const char *__src, size_t __n, + __locale_t __l) __THROW __nonnull ((2, 4)); +#endif +# 172 "/usr/include/string.h" 3 4 + +#if defined __USE_SVID || defined __USE_BSD || defined __USE_XOPEN_EXTENDED \ + || defined __USE_XOPEN2K8 +/* Duplicate S, returning an identical malloc'd string. */ +extern char *strdup (const char *__s) + __THROW __attribute_malloc__ __nonnull ((1)); +#endif +# 179 "/usr/include/string.h" 3 4 + +/* Return a malloc'd copy of at most N bytes of STRING. The + resultant string is terminated even if no null terminator + appears before STRING[N]. */ +#if defined __USE_XOPEN2K8 +extern char *strndup (const char *__string, size_t __n) + __THROW __attribute_malloc__ __nonnull ((1)); +#endif +# 187 "/usr/include/string.h" 3 4 + +#if defined __USE_GNU && defined __GNUC__ +/* Duplicate S, returning an identical alloca'd string. */ +# define strdupa(s) \ + (__extension__ \ + ({ \ + const char *__old = (s); \ + size_t __len = strlen (__old) + 1; \ + char *__new = (char *) __builtin_alloca (__len); \ + (char *) memcpy (__new, __old, __len); \ + })) + +/* Return an alloca'd copy of at most N bytes of string. */ +# define strndupa(s, n) \ + (__extension__ \ + ({ \ + const char *__old = (s); \ + size_t __len = strnlen (__old, (n)); \ + char *__new = (char *) __builtin_alloca (__len + 1); \ + __new[__len] = '\0'; \ + (char *) memcpy (__new, __old, __len); \ + })) +#endif +# 210 "/usr/include/string.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Find the first occurrence of C in S. */ +#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *strchr (char *__s, int __c) + __THROW __asm ("strchr") __attribute_pure__ __nonnull ((1)); +extern const char *strchr (const char *__s, int __c) + __THROW __asm ("strchr") __attribute_pure__ __nonnull ((1)); + +# ifdef __OPTIMIZE__ +__extern_always_inline char * +strchr (char *__s, int __c) __THROW +{ + return __builtin_strchr (__s, __c); +} + +__extern_always_inline const char * +strchr (const char *__s, int __c) __THROW +{ + return __builtin_strchr (__s, __c); +} +# endif +# 234 "/usr/include/string.h" 3 4 +} +#else +# 236 "/usr/include/string.h" 3 4 +extern char *strchr (const char *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +#endif +# 239 "/usr/include/string.h" 3 4 +/* Find the last occurrence of C in S. */ +#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *strrchr (char *__s, int __c) + __THROW __asm ("strrchr") __attribute_pure__ __nonnull ((1)); +extern const char *strrchr (const char *__s, int __c) + __THROW __asm ("strrchr") __attribute_pure__ __nonnull ((1)); + +# ifdef __OPTIMIZE__ +__extern_always_inline char * +strrchr (char *__s, int __c) __THROW +{ + return __builtin_strrchr (__s, __c); +} + +__extern_always_inline const char * +strrchr (const char *__s, int __c) __THROW +{ + return __builtin_strrchr (__s, __c); +} +# endif +# 261 "/usr/include/string.h" 3 4 +} +#else +# 263 "/usr/include/string.h" 3 4 +extern char *strrchr (const char *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +#endif +# 266 "/usr/include/string.h" 3 4 +__END_NAMESPACE_STD + +#ifdef __USE_GNU +/* This function is similar to `strchr'. But it returns a pointer to + the closing NUL byte in case C is not found in S. */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" char *strchrnul (char *__s, int __c) + __THROW __asm ("strchrnul") __attribute_pure__ __nonnull ((1)); +extern "C++" const char *strchrnul (const char *__s, int __c) + __THROW __asm ("strchrnul") __attribute_pure__ __nonnull ((1)); +# else +# 277 "/usr/include/string.h" 3 4 +extern char *strchrnul (const char *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +# endif +# 280 "/usr/include/string.h" 3 4 +#endif +# 281 "/usr/include/string.h" 3 4 + +__BEGIN_NAMESPACE_STD +/* Return the length of the initial segment of S which + consists entirely of characters not in REJECT. */ +extern size_t strcspn (const char *__s, const char *__reject) + __THROW __attribute_pure__ __nonnull ((1, 2)); +/* Return the length of the initial segment of S which + consists entirely of characters in ACCEPT. */ +extern size_t strspn (const char *__s, const char *__accept) + __THROW __attribute_pure__ __nonnull ((1, 2)); +/* Find the first occurrence in S of any character in ACCEPT. */ +#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *strpbrk (char *__s, const char *__accept) + __THROW __asm ("strpbrk") __attribute_pure__ __nonnull ((1, 2)); +extern const char *strpbrk (const char *__s, const char *__accept) + __THROW __asm ("strpbrk") __attribute_pure__ __nonnull ((1, 2)); + +# ifdef __OPTIMIZE__ +__extern_always_inline char * +strpbrk (char *__s, const char *__accept) __THROW +{ + return __builtin_strpbrk (__s, __accept); +} + +__extern_always_inline const char * +strpbrk (const char *__s, const char *__accept) __THROW +{ + return __builtin_strpbrk (__s, __accept); +} +# endif +# 313 "/usr/include/string.h" 3 4 +} +#else +# 315 "/usr/include/string.h" 3 4 +extern char *strpbrk (const char *__s, const char *__accept) + __THROW __attribute_pure__ __nonnull ((1, 2)); +#endif +# 318 "/usr/include/string.h" 3 4 +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +#ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *strstr (char *__haystack, const char *__needle) + __THROW __asm ("strstr") __attribute_pure__ __nonnull ((1, 2)); +extern const char *strstr (const char *__haystack, const char *__needle) + __THROW __asm ("strstr") __attribute_pure__ __nonnull ((1, 2)); + +# ifdef __OPTIMIZE__ +__extern_always_inline char * +strstr (char *__haystack, const char *__needle) __THROW +{ + return __builtin_strstr (__haystack, __needle); +} + +__extern_always_inline const char * +strstr (const char *__haystack, const char *__needle) __THROW +{ + return __builtin_strstr (__haystack, __needle); +} +# endif +# 340 "/usr/include/string.h" 3 4 +} +#else +# 342 "/usr/include/string.h" 3 4 +extern char *strstr (const char *__haystack, const char *__needle) + __THROW __attribute_pure__ __nonnull ((1, 2)); +#endif +# 345 "/usr/include/string.h" 3 4 + + +/* Divide S into tokens separated by characters in DELIM. */ +extern char *strtok (char *__restrict __s, const char *__restrict __delim) + __THROW __nonnull ((2)); +__END_NAMESPACE_STD + +/* Divide S into tokens separated by characters in DELIM. Information + passed between calls are stored in SAVE_PTR. */ +extern char *__strtok_r (char *__restrict __s, + const char *__restrict __delim, + char **__restrict __save_ptr) + __THROW __nonnull ((2, 3)); +#if defined __USE_POSIX || defined __USE_MISC +extern char *strtok_r (char *__restrict __s, const char *__restrict __delim, + char **__restrict __save_ptr) + __THROW __nonnull ((2, 3)); +#endif +# 363 "/usr/include/string.h" 3 4 + +#ifdef __USE_GNU +/* Similar to `strstr' but this function ignores the case of both strings. */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" char *strcasestr (char *__haystack, const char *__needle) + __THROW __asm ("strcasestr") __attribute_pure__ __nonnull ((1, 2)); +extern "C++" const char *strcasestr (const char *__haystack, + const char *__needle) + __THROW __asm ("strcasestr") __attribute_pure__ __nonnull ((1, 2)); +# else +# 373 "/usr/include/string.h" 3 4 +extern char *strcasestr (const char *__haystack, const char *__needle) + __THROW __attribute_pure__ __nonnull ((1, 2)); +# endif +# 376 "/usr/include/string.h" 3 4 +#endif +# 377 "/usr/include/string.h" 3 4 + +#ifdef __USE_GNU +/* Find the first occurrence of NEEDLE in HAYSTACK. + NEEDLE is NEEDLELEN bytes long; + HAYSTACK is HAYSTACKLEN bytes long. */ +extern void *memmem (const void *__haystack, size_t __haystacklen, + const void *__needle, size_t __needlelen) + __THROW __attribute_pure__ __nonnull ((1, 3)); + +/* Copy N bytes of SRC to DEST, return pointer to bytes after the + last written byte. */ +extern void *__mempcpy (void *__restrict __dest, + const void *__restrict __src, size_t __n) + __THROW __nonnull ((1, 2)); +extern void *mempcpy (void *__restrict __dest, + const void *__restrict __src, size_t __n) + __THROW __nonnull ((1, 2)); +#endif +# 395 "/usr/include/string.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return the length of S. */ +extern size_t strlen (const char *__s) + __THROW __attribute_pure__ __nonnull ((1)); +__END_NAMESPACE_STD + +#ifdef __USE_XOPEN2K8 +/* Find the length of STRING, but scan at most MAXLEN characters. + If no '\0' terminator is found in that many characters, return MAXLEN. */ +extern size_t strnlen (const char *__string, size_t __maxlen) + __THROW __attribute_pure__ __nonnull ((1)); +#endif +# 409 "/usr/include/string.h" 3 4 + + +__BEGIN_NAMESPACE_STD +/* Return a string describing the meaning of the `errno' code in ERRNUM. */ +extern char *strerror (int __errnum) __THROW; +__END_NAMESPACE_STD +#if defined __USE_XOPEN2K || defined __USE_MISC +/* Reentrant version of `strerror'. + There are 2 flavors of `strerror_r', GNU which returns the string + and may or may not use the supplied temporary buffer and POSIX one + which fills the string into the buffer. + To use the POSIX version, -D_XOPEN_SOURCE=600 or -D_POSIX_C_SOURCE=200112L + without -D_GNU_SOURCE is needed, otherwise the GNU version is + preferred. */ +# if defined __USE_XOPEN2K && !defined __USE_GNU +/* Fill BUF with a string describing the meaning of the `errno' code in + ERRNUM. */ +# ifdef __REDIRECT_NTH +extern int __REDIRECT_NTH (strerror_r, + (int __errnum, char *__buf, size_t __buflen), + __xpg_strerror_r) __nonnull ((2)); +# else +# 431 "/usr/include/string.h" 3 4 +extern int __xpg_strerror_r (int __errnum, char *__buf, size_t __buflen) + __THROW __nonnull ((2)); +# define strerror_r __xpg_strerror_r +# endif +# 435 "/usr/include/string.h" 3 4 +# else +# 436 "/usr/include/string.h" 3 4 +/* If a temporary buffer is required, at most BUFLEN bytes of BUF will be + used. */ +extern char *strerror_r (int __errnum, char *__buf, size_t __buflen) + __THROW __nonnull ((2)) __wur; +# endif +# 441 "/usr/include/string.h" 3 4 +#endif +# 442 "/usr/include/string.h" 3 4 + +#ifdef __USE_XOPEN2K8 +/* Translate error number to string according to the locale L. */ +extern char *strerror_l (int __errnum, __locale_t __l) __THROW; +#endif +# 447 "/usr/include/string.h" 3 4 + + +/* We define this function always since `bzero' is sometimes needed when + the namespace rules does not allow this. */ +extern void __bzero (void *__s, size_t __n) __THROW __nonnull ((1)); + +#ifdef __USE_BSD +/* Copy N bytes of SRC to DEST (like memmove, but args reversed). */ +extern void bcopy (const void *__src, void *__dest, size_t __n) + __THROW __nonnull ((1, 2)); + +/* Set N bytes of S to 0. */ +extern void bzero (void *__s, size_t __n) __THROW __nonnull ((1)); + +/* Compare N bytes of S1 and S2 (same as memcmp). */ +extern int bcmp (const void *__s1, const void *__s2, size_t __n) + __THROW __attribute_pure__ __nonnull ((1, 2)); + +/* Find the first occurrence of C in S (same as strchr). */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *index (char *__s, int __c) + __THROW __asm ("index") __attribute_pure__ __nonnull ((1)); +extern const char *index (const char *__s, int __c) + __THROW __asm ("index") __attribute_pure__ __nonnull ((1)); + +# if defined __OPTIMIZE__ && !defined __CORRECT_ISO_CPP_STRINGS_H_PROTO +__extern_always_inline char * +index (char *__s, int __c) __THROW +{ + return __builtin_index (__s, __c); +} + +__extern_always_inline const char * +index (const char *__s, int __c) __THROW +{ + return __builtin_index (__s, __c); +} +# endif +# 487 "/usr/include/string.h" 3 4 +} +# else +# 489 "/usr/include/string.h" 3 4 +extern char *index (const char *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +# endif +# 492 "/usr/include/string.h" 3 4 + +/* Find the last occurrence of C in S (same as strrchr). */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" +{ +extern char *rindex (char *__s, int __c) + __THROW __asm ("rindex") __attribute_pure__ __nonnull ((1)); +extern const char *rindex (const char *__s, int __c) + __THROW __asm ("rindex") __attribute_pure__ __nonnull ((1)); + +# if defined __OPTIMIZE__ && !defined __CORRECT_ISO_CPP_STRINGS_H_PROTO +__extern_always_inline char * +rindex (char *__s, int __c) __THROW +{ + return __builtin_rindex (__s, __c); +} + +__extern_always_inline const char * +rindex (const char *__s, int __c) __THROW +{ + return __builtin_rindex (__s, __c); +} +#endif +# 515 "/usr/include/string.h" 3 4 +} +# else +# 517 "/usr/include/string.h" 3 4 +extern char *rindex (const char *__s, int __c) + __THROW __attribute_pure__ __nonnull ((1)); +# endif +# 520 "/usr/include/string.h" 3 4 + +/* Return the position of the first bit set in I, or 0 if none are set. + The least-significant bit is position 1, the most-significant 32. */ +extern int ffs (int __i) __THROW __attribute__ ((__const__)); + +/* The following two functions are non-standard but necessary for non-32 bit + platforms. */ +# ifdef __USE_GNU +extern int ffsl (long int __l) __THROW __attribute__ ((__const__)); +__extension__ extern int ffsll (long long int __ll) + __THROW __attribute__ ((__const__)); +# endif +# 532 "/usr/include/string.h" 3 4 + +/* Compare S1 and S2, ignoring case. */ +extern int strcasecmp (const char *__s1, const char *__s2) + __THROW __attribute_pure__ __nonnull ((1, 2)); + +/* Compare no more than N chars of S1 and S2, ignoring case. */ +extern int strncasecmp (const char *__s1, const char *__s2, size_t __n) + __THROW __attribute_pure__ __nonnull ((1, 2)); +#endif /* Use BSD. */ +# 541 "/usr/include/string.h" 3 4 + +#ifdef __USE_GNU +/* Again versions of a few functions which use the given locale instead + of the global one. */ +extern int strcasecmp_l (const char *__s1, const char *__s2, + __locale_t __loc) + __THROW __attribute_pure__ __nonnull ((1, 2, 3)); + +extern int strncasecmp_l (const char *__s1, const char *__s2, + size_t __n, __locale_t __loc) + __THROW __attribute_pure__ __nonnull ((1, 2, 4)); +#endif +# 553 "/usr/include/string.h" 3 4 + +#ifdef __USE_BSD +/* Return the next DELIM-delimited token from *STRINGP, + terminating it with a '\0', and update *STRINGP to point past it. */ +extern char *strsep (char **__restrict __stringp, + const char *__restrict __delim) + __THROW __nonnull ((1, 2)); +#endif +# 561 "/usr/include/string.h" 3 4 + +#ifdef __USE_XOPEN2K8 +/* Return a string describing the meaning of the signal number in SIG. */ +extern char *strsignal (int __sig) __THROW; + +/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ +extern char *__stpcpy (char *__restrict __dest, const char *__restrict __src) + __THROW __nonnull ((1, 2)); +extern char *stpcpy (char *__restrict __dest, const char *__restrict __src) + __THROW __nonnull ((1, 2)); + +/* Copy no more than N characters of SRC to DEST, returning the address of + the last character written into DEST. */ +extern char *__stpncpy (char *__restrict __dest, + const char *__restrict __src, size_t __n) + __THROW __nonnull ((1, 2)); +extern char *stpncpy (char *__restrict __dest, + const char *__restrict __src, size_t __n) + __THROW __nonnull ((1, 2)); +#endif +# 581 "/usr/include/string.h" 3 4 + +#ifdef __USE_GNU +/* Compare S1 and S2 as strings holding name & indices/version numbers. */ +extern int strverscmp (const char *__s1, const char *__s2) + __THROW __attribute_pure__ __nonnull ((1, 2)); + +/* Sautee STRING briskly. */ +extern char *strfry (char *__string) __THROW __nonnull ((1)); + +/* Frobnicate N bytes of S. */ +extern void *memfrob (void *__s, size_t __n) __THROW __nonnull ((1)); + +# ifndef basename +/* Return the file name within directory of FILENAME. We don't + declare the function if the `basename' macro is available (defined + in ) which makes the XPG version of this function + available. */ +# ifdef __CORRECT_ISO_CPP_STRING_H_PROTO +extern "C++" char *basename (char *__filename) + __THROW __asm ("basename") __nonnull ((1)); +extern "C++" const char *basename (const char *__filename) + __THROW __asm ("basename") __nonnull ((1)); +# else +# 604 "/usr/include/string.h" 3 4 +extern char *basename (const char *__filename) __THROW __nonnull ((1)); +# endif +# 606 "/usr/include/string.h" 3 4 +# endif +# 607 "/usr/include/string.h" 3 4 +#endif +# 608 "/usr/include/string.h" 3 4 + + +#if defined __GNUC__ && __GNUC__ >= 2 +# if defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ \ + && !defined __NO_INLINE__ && !defined __cplusplus +/* When using GNU CC we provide some optimized versions of selected + functions from this header. There are two kinds of optimizations: + + - machine-dependent optimizations, most probably using inline + assembler code; these might be quite expensive since the code + size can increase significantly. + These optimizations are not used unless the symbol + __USE_STRING_INLINES + is defined before including this header. + + - machine-independent optimizations which do not increase the + code size significantly and which optimize mainly situations + where one or more arguments are compile-time constants. + These optimizations are used always when the compiler is + taught to optimize. + + One can inhibit all optimizations by defining __NO_STRING_INLINES. */ + +/* Get the machine-dependent optimizations (if any). */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 632 "/usr/include/string.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/string.h" 1 3 4 +/* Optimized, inlined string functions. i486/x86-64 version. + Copyright (C) 2001-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STRING_H +# error "Never use directly; include instead." +#endif +# 22 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +/* The ix86 processors can access unaligned multi-byte variables. */ +#define _STRING_ARCH_unaligned 1 + +/* Enable inline functions only for i486 or better when compiling for + ia32. */ +#if !defined __x86_64__ && (defined __i486__ || defined __pentium__ \ + || defined __pentiumpro__ || defined __pentium4__ \ + || defined __nocona__ || defined __atom__ \ + || defined __core2__ || defined __corei7__ \ + || defined __k6__ || defined __geode__ \ + || defined __k8__ || defined __athlon__ \ + || defined __amdfam10__) + +/* We only provide optimizations if the user selects them and if + GNU CC is used. */ +# if !defined __NO_STRING_INLINES && defined __USE_STRING_INLINES \ + && defined __GNUC__ && __GNUC__ >= 2 + +# ifndef __STRING_INLINE +# ifndef __extern_inline +# define __STRING_INLINE inline +# else +# 45 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# define __STRING_INLINE __extern_inline +# endif +# 47 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif +# 48 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +/* The macros are used in some of the optimized implementations below. */ +# define __STRING_SMALL_GET16(src, idx) \ + ((((const unsigned char *) (src))[idx + 1] << 8) \ + | ((const unsigned char *) (src))[idx]) +# define __STRING_SMALL_GET32(src, idx) \ + (((((const unsigned char *) (src))[idx + 3] << 8 \ + | ((const unsigned char *) (src))[idx + 2]) << 8 \ + | ((const unsigned char *) (src))[idx + 1]) << 8 \ + | ((const unsigned char *) (src))[idx]) + + +/* Copy N bytes of SRC to DEST. */ +# define _HAVE_STRING_ARCH_memcpy 1 +# define memcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (n) \ + ? __memcpy_c ((dest), (src), (n)) \ + : __memcpy_g ((dest), (src), (n)))) +# define __memcpy_c(dest, src, n) \ + ((n) == 0 \ + ? (dest) \ + : (((n) % 4 == 0) \ + ? __memcpy_by4 (dest, src, n) \ + : (((n) % 2 == 0) \ + ? __memcpy_by2 (dest, src, n) \ + : __memcpy_g (dest, src, n)))) + +__STRING_INLINE void *__memcpy_by4 (void *__dest, const void *__src, + size_t __n); + +__STRING_INLINE void * +__memcpy_by4 (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __dest; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__n / 4) + : "memory", "cc"); + return __dest; +} + +__STRING_INLINE void *__memcpy_by2 (void *__dest, const void *__src, + size_t __n); + +__STRING_INLINE void * +__memcpy_by2 (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __dest; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__n / 2) + : "memory", "cc"); + return __dest; +} + +__STRING_INLINE void *__memcpy_g (void *__dest, const void *__src, size_t __n); + +__STRING_INLINE void * +__memcpy_g (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register void *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=&c" (__d0), "=&D" (__d1), "=&S" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__tmp), "2" (__src), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src) + : "cc"); + return __dest; +} + +# define _HAVE_STRING_ARCH_memmove 1 +# ifndef _FORCE_INLINES +/* Copy N bytes of SRC to DEST, guaranteeing + correct behavior for overlapping strings. */ +# define memmove(dest, src, n) __memmove_g (dest, src, n) + +__STRING_INLINE void *__memmove_g (void *, const void *, size_t) + __asm__ ("memmove"); + +__STRING_INLINE void * +__memmove_g (void *__dest, const void *__src, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register void *__tmp = __dest; + if (__dest < __src) + __asm__ __volatile__ + ("cld\n\t" + "rep; movsb" + : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__src), "2" (__tmp), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); + else + __asm__ __volatile__ + ("std\n\t" + "rep; movsb\n\t" + "cld" + : "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__dest) + : "0" (__n), "1" (__n - 1 + (const char *) __src), + "2" (__n - 1 + (char *) __tmp), + "m" ( *(struct { __extension__ char __x[__n]; } *)__src)); + return __dest; +} +# endif +# 185 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +/* Compare N bytes of S1 and S2. */ +# define _HAVE_STRING_ARCH_memcmp 1 +# ifndef _FORCE_INLINES +# ifndef __PIC__ +/* gcc has problems to spill registers when using PIC. */ +__STRING_INLINE int +memcmp (const void *__s1, const void *__s2, size_t __n) +{ + register unsigned long int __d0, __d1, __d2; + register int __res; + __asm__ __volatile__ + ("cld\n\t" + "testl %3,%3\n\t" + "repe; cmpsb\n\t" + "je 1f\n\t" + "sbbl %0,%0\n\t" + "orl $1,%0\n" + "1:" + : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) + : "0" (0), "1" (__s1), "2" (__s2), "3" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) + : "cc"); + return __res; +} +# endif +# 212 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif +# 213 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +/* Set N bytes of S to C. */ +# define _HAVE_STRING_ARCH_memset 1 +# define _USE_STRING_ARCH_memset 1 +# define memset(s, c, n) \ + (__extension__ (__builtin_constant_p (n) && (n) <= 16 \ + ? ((n) == 1 \ + ? __memset_c1 ((s), (c)) \ + : __memset_gc ((s), (c), (n))) \ + : (__builtin_constant_p (c) \ + ? (__builtin_constant_p (n) \ + ? __memset_ccn ((s), (c), (n)) \ + : memset ((s), (c), (n))) \ + : (__builtin_constant_p (n) \ + ? __memset_gcn ((s), (c), (n)) \ + : memset ((s), (c), (n)))))) + +# define __memset_c1(s, c) ({ void *__s = (s); \ + *((unsigned char *) __s) = (unsigned char) (c); \ + __s; }) + +# define __memset_gc(s, c, n) \ + ({ void *__s = (s); \ + union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + } *__u = __s; \ + unsigned int __c = ((unsigned int) ((unsigned char) (c))) * 0x01010101; \ + \ + /* We apply a trick here. `gcc' would implement the following \ + assignments using immediate operands. But this uses to much \ + memory (7, instead of 4 bytes). So we force the value in a \ + registers. */ \ + if ((n) == 3 || (n) >= 5) \ + __asm__ __volatile__ ("" : "=r" (__c) : "0" (__c)); \ + \ + /* This `switch' statement will be removed at compile-time. */ \ + switch (n) \ + { \ + case 15: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 11: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 7: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 3: \ + __u->__usi = (unsigned short int) __c; \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 14: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 10: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 6: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 2: \ + __u->__usi = (unsigned short int) __c; \ + break; \ + \ + case 13: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 9: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 5: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 1: \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 16: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 12: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 8: \ + __u->__ui = __c; \ + __u = __extension__ ((void *) __u + 4); \ + case 4: \ + __u->__ui = __c; \ + case 0: \ + break; \ + } \ + \ + __s; }) + +# define __memset_ccn(s, c, n) \ + (((n) % 4 == 0) \ + ? __memset_ccn_by4 (s, ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ + n) \ + : (((n) % 2 == 0) \ + ? __memset_ccn_by2 (s, \ + ((unsigned int) ((unsigned char) (c))) * 0x01010101,\ + n) \ + : memset (s, c, n))) + +__STRING_INLINE void *__memset_ccn_by4 (void *__s, unsigned int __c, + size_t __n); + +__STRING_INLINE void * +__memset_ccn_by4 (void *__s, unsigned int __c, size_t __n) +{ + register void *__tmp = __s; + register unsigned long int __d0; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "rep; stosl" + : "=&a" (__c), "=&D" (__tmp), "=&c" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# else +# 338 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + __asm__ __volatile__ + ("1:\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + : "=&r" (__c), "=&r" (__tmp), "=&r" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# endif +# 349 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + return __s; +} + +__STRING_INLINE void *__memset_ccn_by2 (void *__s, unsigned int __c, + size_t __n); + +__STRING_INLINE void * +__memset_ccn_by2 (void *__s, unsigned int __c, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __s; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "rep; stosl\n" + "stosw" + : "=&a" (__d0), "=&D" (__tmp), "=&c" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +# else +# 370 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + __asm__ __volatile__ + ("1:\tmovl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); +#endif +# 381 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + return __s; +} + +# define __memset_gcn(s, c, n) \ + (((n) % 4 == 0) \ + ? __memset_gcn_by4 (s, c, n) \ + : (((n) % 2 == 0) \ + ? __memset_gcn_by2 (s, c, n) \ + : memset (s, c, n))) + +__STRING_INLINE void *__memset_gcn_by4 (void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memset_gcn_by4 (void *__s, int __c, size_t __n) +{ + register void *__tmp = __s; + register unsigned long int __d0; + __asm__ __volatile__ + ("movb %b0,%h0\n" + "pushw %w0\n\t" + "shll $16,%0\n\t" + "popw %w0\n" + "1:\n\t" + "movl %0,(%1)\n\t" + "addl $4,%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + : "=&q" (__c), "=&r" (__tmp), "=&r" (__d0), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); + return __s; +} + +__STRING_INLINE void *__memset_gcn_by2 (void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memset_gcn_by2 (void *__s, int __c, size_t __n) +{ + register unsigned long int __d0, __d1; + register void *__tmp = __s; + __asm__ __volatile__ + ("movb %b0,%h0\n\t" + "pushw %w0\n\t" + "shll $16,%0\n\t" + "popw %w0\n" + "1:\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %2\n\t" + "jnz 1b\n" + "movw %w0,(%1)" + : "=&q" (__d0), "=&r" (__tmp), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "0" ((unsigned int) __c), "1" (__tmp), "2" (__n / 4) + : "cc"); + return __s; +} + + +/* Search N bytes of S for C. */ +# define _HAVE_STRING_ARCH_memchr 1 +# ifndef _FORCE_INLINES +__STRING_INLINE void * +memchr (const void *__s, int __c, size_t __n) +{ + register unsigned long int __d0; +# ifdef __i686__ + register unsigned long int __d1; +# endif +# 451 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + register unsigned char *__res; + if (__n == 0) + return NULL; +# ifdef __i686__ + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "cmovne %2,%0" + : "=D" (__res), "=&c" (__d0), "=&r" (__d1) + : "a" (__c), "0" (__s), "1" (__n), "2" (1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# else +# 464 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "je 1f\n\t" + "movl $1,%0\n" + "1:" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s), "1" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# endif +# 475 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + return __res - 1; +} +# endif +# 478 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +# define _HAVE_STRING_ARCH_memrchr 1 +# ifndef _FORCE_INLINES +__STRING_INLINE void *__memrchr (const void *__s, int __c, size_t __n); + +__STRING_INLINE void * +__memrchr (const void *__s, int __c, size_t __n) +{ + register unsigned long int __d0; +# ifdef __i686__ + register unsigned long int __d1; +# endif +# 490 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + register void *__res; + if (__n == 0) + return NULL; +# ifdef __i686__ + __asm__ __volatile__ + ("std\n\t" + "repne; scasb\n\t" + "cmovne %2,%0\n\t" + "cld\n\t" + "incl %0" + : "=D" (__res), "=&c" (__d0), "=&r" (__d1) + : "a" (__c), "0" (__s + __n - 1), "1" (__n), "2" (-1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# else +# 505 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + __asm__ __volatile__ + ("std\n\t" + "repne; scasb\n\t" + "je 1f\n\t" + "orl $-1,%0\n" + "1:\tcld\n\t" + "incl %0" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s + __n - 1), "1" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s) + : "cc"); +# endif +# 517 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + return __res; +} +# ifdef __USE_GNU +# define memrchr(s, c, n) __memrchr ((s), (c), (n)) +# endif +# 522 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif +# 523 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +/* Return pointer to C in S. */ +# define _HAVE_STRING_ARCH_rawmemchr 1 +__STRING_INLINE void *__rawmemchr (const void *__s, int __c); + +# ifndef _FORCE_INLINES +__STRING_INLINE void * +__rawmemchr (const void *__s, int __c) +{ + register unsigned long int __d0; + register unsigned char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + : "=D" (__res), "=&c" (__d0) + : "a" (__c), "0" (__s), "1" (0xffffffff), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} +# ifdef __USE_GNU +__STRING_INLINE void * +rawmemchr (const void *__s, int __c) +{ + return __rawmemchr (__s, __c); +} +# endif /* use GNU */ +# 550 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif +# 551 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Return the length of S. */ +# define _HAVE_STRING_ARCH_strlen 1 +# define strlen(str) \ + (__extension__ (__builtin_constant_p (str) \ + ? __builtin_strlen (str) \ + : __strlen_g (str))) +__STRING_INLINE size_t __strlen_g (const char *__str); + +__STRING_INLINE size_t +__strlen_g (const char *__str) +{ + register char __dummy; + register const char *__tmp = __str; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%b1\n\t" + "leal 1(%0),%0\n\t" + "testb %b1,%b1\n\t" + "jne 1b" + : "=r" (__tmp), "=&q" (__dummy) + : "0" (__str), + "m" ( *(struct { char __x[0xfffffff]; } *)__str) + : "cc" ); + return __tmp - __str - 1; +} + + +/* Copy SRC to DEST. */ +# define _HAVE_STRING_ARCH_strcpy 1 +# define strcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (sizeof ((src)[0]) == 1 && strlen (src) + 1 <= 8 \ + ? __strcpy_a_small ((dest), (src), strlen (src) + 1) \ + : (char *) memcpy ((char *) (dest), \ + (const char *) (src), \ + strlen (src) + 1)) \ + : __strcpy_g ((dest), (src)))) + +# define __strcpy_a_small(dest, src, srclen) \ + (__extension__ ({ char *__dest = (dest); \ + union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + char __c; \ + } *__u = (void *) __dest; \ + switch (srclen) \ + { \ + case 1: \ + __u->__uc = '\0'; \ + break; \ + case 2: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + break; \ + case 3: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 4: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + break; \ + case 5: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__uc = '\0'; \ + break; \ + case 6: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + break; \ + case 7: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 8: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__ui = __STRING_SMALL_GET32 (src, 4); \ + break; \ + } \ + (char *) __dest; })) + +__STRING_INLINE char *__strcpy_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__strcpy_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest; + register char __dummy; + __asm__ __volatile__ + ( + "1:\n\t" + "movb (%0),%b2\n\t" + "leal 1(%0),%0\n\t" + "movb %b2,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__src), "1" (__tmp), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "cc"); + return __dest; +} + + +# ifdef __USE_GNU +# define _HAVE_STRING_ARCH_stpcpy 1 +/* Copy SRC to DEST. */ +# define __stpcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (strlen (src) + 1 <= 8 \ + ? __stpcpy_a_small ((dest), (src), strlen (src) + 1) \ + : __stpcpy_c ((dest), (src), strlen (src) + 1)) \ + : __stpcpy_g ((dest), (src)))) +# define __stpcpy_c(dest, src, srclen) \ + ((srclen) % 4 == 0 \ + ? __mempcpy_by4 (dest, src, srclen) - 1 \ + : ((srclen) % 2 == 0 \ + ? __mempcpy_by2 (dest, src, srclen) - 1 \ + : __mempcpy_byn (dest, src, srclen) - 1)) + +/* In glibc itself we use this symbol for namespace reasons. */ +# define stpcpy(dest, src) __stpcpy ((dest), (src)) + +# define __stpcpy_a_small(dest, src, srclen) \ + (__extension__ ({ union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + char __c; \ + } *__u = (void *) (dest); \ + switch (srclen) \ + { \ + case 1: \ + __u->__uc = '\0'; \ + break; \ + case 2: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 1); \ + break; \ + case 3: \ + __u->__usi = __STRING_SMALL_GET16 (src, 0); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 4: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 3); \ + break; \ + case 5: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__uc = '\0'; \ + break; \ + case 6: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 1); \ + break; \ + case 7: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__usi = __STRING_SMALL_GET16 (src, 4); \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = '\0'; \ + break; \ + case 8: \ + __u->__ui = __STRING_SMALL_GET32 (src, 0); \ + __u = __extension__ ((void *) __u + 4); \ + __u->__ui = __STRING_SMALL_GET32 (src, 4); \ + __u = __extension__ ((void *) __u + 3); \ + break; \ + } \ + (char *) __u; })) + +__STRING_INLINE char *__mempcpy_by4 (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_by4 (char *__dest, const char *__src, size_t __srclen) +{ + register char *__tmp = __dest; + register unsigned long int __d0, __d1; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1) + : "1" (__tmp), "2" (__src), "3" (__srclen / 4) + : "memory", "cc"); + return __tmp; +} + +__STRING_INLINE char *__mempcpy_by2 (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_by2 (char *__dest, const char *__src, size_t __srclen) +{ + register char *__tmp = __dest; + register unsigned long int __d0, __d1; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)" + : "=&q" (__d0), "=r" (__tmp), "=&r" (__src), "=&r" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 2), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + return __tmp + 2; +} + +__STRING_INLINE char *__mempcpy_byn (char *__dest, const char *__src, + size_t __srclen); + +__STRING_INLINE char * +__mempcpy_byn (char *__dest, const char *__src, size_t __srclen) +{ + register unsigned long __d0, __d1; + register char *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%%ecx\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%%ecx\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "0" (__tmp), "1" (__srclen), "2" (__src), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + return __tmp; +} + +__STRING_INLINE char *__stpcpy_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__stpcpy_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest; + register char __dummy; + __asm__ __volatile__ + ( + "1:\n\t" + "movb (%0),%b2\n\t" + "leal 1(%0),%0\n\t" + "movb %b2,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=&r" (__src), "=r" (__tmp), "=&q" (__dummy), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__src), "1" (__tmp), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "cc"); + return __tmp - 1; +} +# endif +# 838 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Copy no more than N characters of SRC to DEST. */ +# define _HAVE_STRING_ARCH_strncpy 1 +# define strncpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) \ + ? ((strlen (src) + 1 >= ((size_t) (n)) \ + ? (char *) memcpy ((char *) (dest), \ + (const char *) (src), n) \ + : __strncpy_cg ((dest), (src), strlen (src) + 1, n))) \ + : __strncpy_gg ((dest), (src), n))) +# define __strncpy_cg(dest, src, srclen, n) \ + (((srclen) % 4 == 0) \ + ? __strncpy_by4 (dest, src, srclen, n) \ + : (((srclen) % 2 == 0) \ + ? __strncpy_by2 (dest, src, srclen, n) \ + : __strncpy_byn (dest, src, srclen, n))) + +__STRING_INLINE char *__strncpy_by4 (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_by4 (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register char *__tmp = __dest; + register int __dummy1, __dummy2; + __asm__ __volatile__ + ("1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b" + : "=&r" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 4), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_by2 (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_by2 (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register char *__tmp = __dest; + register int __dummy1, __dummy2; + __asm__ __volatile__ + ("shrl $1,%3\n\t" + "jz 2f\n" /* only a word */ + "1:\n\t" + "movl (%2),%0\n\t" + "leal 4(%2),%2\n\t" + "movl %0,(%1)\n\t" + "leal 4(%1),%1\n\t" + "decl %3\n\t" + "jnz 1b\n" + "2:\n\t" + "movw (%2),%w0\n\t" + "movw %w0,(%1)\n\t" + : "=&q" (__dummy1), "=r" (__tmp), "=&r" (__src), "=&r" (__dummy2), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__tmp), "2" (__src), "3" (__srclen / 2), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp + 2, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_byn (char *__dest, const char __src[], + size_t __srclen, size_t __n); + +__STRING_INLINE char * +__strncpy_byn (char *__dest, const char __src[], size_t __srclen, size_t __n) +{ + register unsigned long int __d0, __d1; + register char *__tmp = __dest; + __asm__ __volatile__ + ("cld\n\t" + "shrl $1,%1\n\t" + "jnc 1f\n\t" + "movsb\n" + "1:\n\t" + "shrl $1,%1\n\t" + "jnc 2f\n\t" + "movsw\n" + "2:\n\t" + "rep; movsl" + : "=D" (__tmp), "=&c" (__d0), "=&S" (__d1), + "=m" ( *(struct { __extension__ char __x[__srclen]; } *)__dest) + : "1" (__srclen), "0" (__tmp),"2" (__src), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + (void) memset (__tmp, '\0', __n - __srclen); + return __dest; +} + +__STRING_INLINE char *__strncpy_gg (char *__dest, const char *__src, + size_t __n); + +__STRING_INLINE char * +__strncpy_gg (char *__dest, const char *__src, size_t __n) +{ + register char *__tmp = __dest; + register char __dummy; + if (__n > 0) + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%2\n\t" + "incl %0\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "je 3f\n\t" + "testb %2,%2\n\t" + "jne 1b\n\t" + "2:\n\t" + "movb %2,(%1)\n\t" + "incl %1\n\t" + "decl %3\n\t" + "jne 2b\n\t" + "3:" + : "=&r" (__src), "=&r" (__tmp), "=&q" (__dummy), "=&r" (__n) + : "0" (__src), "1" (__tmp), "3" (__n) + : "memory", "cc"); + + return __dest; +} + + +/* Append SRC onto DEST. */ +# define _HAVE_STRING_ARCH_strcat 1 +# define strcat(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? __strcat_c ((dest), (src), strlen (src) + 1) \ + : __strcat_g ((dest), (src)))) + +__STRING_INLINE char *__strcat_c (char *__dest, const char __src[], + size_t __srclen); + +__STRING_INLINE char * +__strcat_c (char *__dest, const char __src[], size_t __srclen) +{ +# ifdef __i686__ + register unsigned long int __d0; + register char *__tmp; + __asm__ __volatile__ + ("repne; scasb" + : "=D" (__tmp), "=&c" (__d0), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__dest), "1" (0xffffffff), "a" (0), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); + --__tmp; +# else +# 997 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + register char *__tmp = __dest - 1; + __asm__ __volatile__ + ("1:\n\t" + "incl %0\n\t" + "cmpb $0,(%0)\n\t" + "jne 1b\n" + : "=r" (__tmp), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "0" (__tmp), + "m" ( *(struct { __extension__ char __x[__srclen]; } *)__src) + : "cc"); +# endif +# 1009 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + (void) memcpy (__tmp, __src, __srclen); + return __dest; +} + +__STRING_INLINE char *__strcat_g (char *__dest, const char *__src); + +__STRING_INLINE char * +__strcat_g (char *__dest, const char *__src) +{ + register char *__tmp = __dest - 1; + register char __dummy; + __asm__ __volatile__ + ("1:\n\t" + "incl %1\n\t" + "cmpb $0,(%1)\n\t" + "jne 1b\n" + "2:\n\t" + "movb (%2),%b0\n\t" + "incl %2\n\t" + "movb %b0,(%1)\n\t" + "incl %1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n" + : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), + "=m" ( *(struct { char __x[0xfffffff]; } *)__dest) + : "1" (__tmp), "2" (__src), + "m" ( *(struct { char __x[0xfffffff]; } *)__src) + : "memory", "cc"); + return __dest; +} + + +/* Append no more than N characters from SRC onto DEST. */ +# define _HAVE_STRING_ARCH_strncat 1 +# define strncat(dest, src, n) \ + (__extension__ ({ char *__dest = (dest); \ + __builtin_constant_p (src) && __builtin_constant_p (n) \ + ? (strlen (src) < ((size_t) (n)) \ + ? strcat (__dest, (src)) \ + : (*(char *)__mempcpy (strchr (__dest, '\0'), \ + (const char *) (src), \ + (n)) = 0, __dest)) \ + : __strncat_g (__dest, (src), (n)); })) + +__STRING_INLINE char *__strncat_g (char *__dest, const char __src[], + size_t __n); + +__STRING_INLINE char * +__strncat_g (char *__dest, const char __src[], size_t __n) +{ + register char *__tmp = __dest; + register char __dummy; +# ifdef __i686__ + __asm__ __volatile__ + ("repne; scasb\n" + "movl %4, %3\n\t" + "decl %1\n\t" + "1:\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" + "movb (%2),%b0\n\t" + "movsb\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "decl %1\n" + "2:\n\t" + "movb $0,(%1)" + : "=&a" (__dummy), "=&D" (__tmp), "=&S" (__src), "=&c" (__n) + : "g" (__n), "0" (0), "1" (__tmp), "2" (__src), "3" (0xffffffff) + : "memory", "cc"); +# else +# 1080 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + --__tmp; + __asm__ __volatile__ + ("1:\n\t" + "cmpb $0,1(%1)\n\t" + "leal 1(%1),%1\n\t" + "jne 1b\n" + "2:\n\t" + "subl $1,%3\n\t" + "jc 3f\n\t" + "movb (%2),%b0\n\t" + "leal 1(%2),%2\n\t" + "movb %b0,(%1)\n\t" + "leal 1(%1),%1\n\t" + "testb %b0,%b0\n\t" + "jne 2b\n\t" + "decl %1\n" + "3:\n\t" + "movb $0,(%1)" + : "=&q" (__dummy), "=&r" (__tmp), "=&r" (__src), "=&r" (__n) + : "1" (__tmp), "2" (__src), "3" (__n) + : "memory", "cc"); +#endif +# 1102 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + return __dest; +} + + +/* Compare S1 and S2. */ +# define _HAVE_STRING_ARCH_strcmp 1 +# define strcmp(s1, s2) \ + (__extension__ (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ + && (sizeof ((s1)[0]) != 1 || strlen (s1) >= 4) \ + && (sizeof ((s2)[0]) != 1 || strlen (s2) >= 4) \ + ? memcmp ((const char *) (s1), (const char *) (s2), \ + (strlen (s1) < strlen (s2) \ + ? strlen (s1) : strlen (s2)) + 1) \ + : (__builtin_constant_p (s1) && sizeof ((s1)[0]) == 1 \ + && sizeof ((s2)[0]) == 1 && strlen (s1) < 4 \ + ? (__builtin_constant_p (s2) && sizeof ((s2)[0]) == 1 \ + ? __strcmp_cc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s1)) \ + : __strcmp_cg ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s1))) \ + : (__builtin_constant_p (s2) && sizeof ((s1)[0]) == 1 \ + && sizeof ((s2)[0]) == 1 && strlen (s2) < 4 \ + ? (__builtin_constant_p (s1) \ + ? __strcmp_cc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s2)) \ + : __strcmp_gc ((const unsigned char *) (s1), \ + (const unsigned char *) (s2), \ + strlen (s2))) \ + : __strcmp_gg ((s1), (s2)))))) + +# define __strcmp_cc(s1, s2, l) \ + (__extension__ ({ register int __result = (s1)[0] - (s2)[0]; \ + if (l > 0 && __result == 0) \ + { \ + __result = (s1)[1] - (s2)[1]; \ + if (l > 1 && __result == 0) \ + { \ + __result = (s1)[2] - (s2)[2]; \ + if (l > 2 && __result == 0) \ + __result = (s1)[3] - (s2)[3]; \ + } \ + } \ + __result; })) + +# define __strcmp_cg(s1, s2, l1) \ + (__extension__ ({ const unsigned char *__s2 = (s2); \ + register int __result = (s1)[0] - __s2[0]; \ + if (l1 > 0 && __result == 0) \ + { \ + __result = (s1)[1] - __s2[1]; \ + if (l1 > 1 && __result == 0) \ + { \ + __result = (s1)[2] - __s2[2]; \ + if (l1 > 2 && __result == 0) \ + __result = (s1)[3] - __s2[3]; \ + } \ + } \ + __result; })) + +# define __strcmp_gc(s1, s2, l2) \ + (__extension__ ({ const unsigned char *__s1 = (s1); \ + register int __result = __s1[0] - (s2)[0]; \ + if (l2 > 0 && __result == 0) \ + { \ + __result = __s1[1] - (s2)[1]; \ + if (l2 > 1 && __result == 0) \ + { \ + __result = __s1[2] - (s2)[2]; \ + if (l2 > 2 && __result == 0) \ + __result = __s1[3] - (s2)[3]; \ + } \ + } \ + __result; })) + +__STRING_INLINE int __strcmp_gg (const char *__s1, const char *__s2); + +__STRING_INLINE int +__strcmp_gg (const char *__s1, const char *__s2) +{ + register int __res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%1),%b0\n\t" + "leal 1(%1),%1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 2f\n\t" + "leal 1(%2),%2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n\t" + "xorl %0,%0\n\t" + "jmp 3f\n" + "2:\n\t" + "movl $1,%0\n\t" + "jb 3f\n\t" + "negl %0\n" + "3:" + : "=q" (__res), "=&r" (__s1), "=&r" (__s2) + : "1" (__s1), "2" (__s2), + "m" ( *(struct { char __x[0xfffffff]; } *)__s1), + "m" ( *(struct { char __x[0xfffffff]; } *)__s2) + : "cc"); + return __res; +} + + +/* Compare N characters of S1 and S2. */ +# define _HAVE_STRING_ARCH_strncmp 1 +# define strncmp(s1, s2, n) \ + (__extension__ (__builtin_constant_p (s1) && strlen (s1) < ((size_t) (n)) \ + ? strcmp ((s1), (s2)) \ + : (__builtin_constant_p (s2) && strlen (s2) < ((size_t) (n))\ + ? strcmp ((s1), (s2)) \ + : __strncmp_g ((s1), (s2), (n))))) + +__STRING_INLINE int __strncmp_g (const char *__s1, const char *__s2, + size_t __n); + +__STRING_INLINE int +__strncmp_g (const char *__s1, const char *__s2, size_t __n) +{ + register int __res; + __asm__ __volatile__ + ("1:\n\t" + "subl $1,%3\n\t" + "jc 2f\n\t" + "movb (%1),%b0\n\t" + "incl %1\n\t" + "cmpb %b0,(%2)\n\t" + "jne 3f\n\t" + "incl %2\n\t" + "testb %b0,%b0\n\t" + "jne 1b\n" + "2:\n\t" + "xorl %0,%0\n\t" + "jmp 4f\n" + "3:\n\t" + "movl $1,%0\n\t" + "jb 4f\n\t" + "negl %0\n" + "4:" + : "=q" (__res), "=&r" (__s1), "=&r" (__s2), "=&r" (__n) + : "1" (__s1), "2" (__s2), "3" (__n), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s1), + "m" ( *(struct { __extension__ char __x[__n]; } *)__s2) + : "cc"); + return __res; +} + + +/* Find the first occurrence of C in S. */ +# define _HAVE_STRING_ARCH_strchr 1 +# define _USE_STRING_ARCH_strchr 1 +# define strchr(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? ((c) == '\0' \ + ? (char *) __rawmemchr ((s), (c)) \ + : __strchr_c ((s), ((c) & 0xff) << 8)) \ + : __strchr_g ((s), (c)))) + +__STRING_INLINE char *__strchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strchr_c (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %0,%0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strchr_g (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "xorl %0,%0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + + +/* Find the first occurrence of C in S or the final NUL byte. */ +# define _HAVE_STRING_ARCH_strchrnul 1 +# define __strchrnul(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? ((c) == '\0' \ + ? (char *) __rawmemchr ((s), c) \ + : __strchrnul_c ((s), ((c) & 0xff) << 8)) \ + : __strchrnul_g ((s), c))) + +__STRING_INLINE char *__strchrnul_c (const char *__s, int __c); + +__STRING_INLINE char * +__strchrnul_c (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "decl %0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strchrnul_g (const char *__s, int __c); + +__STRING_INLINE char * +__strchrnul_g (const char *__s, int __c) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "1:\n\t" + "movb (%0),%%al\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "leal 1(%0),%0\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n\t" + "decl %0\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} +# ifdef __USE_GNU +# define strchrnul(s, c) __strchrnul ((s), (c)) +# endif +# 1374 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +# if defined __USE_BSD || defined __USE_XOPEN_EXTENDED +/* Find the first occurrence of C in S. This is the BSD name. */ +# define _HAVE_STRING_ARCH_index 1 +# define index(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strchr_c ((s), ((c) & 0xff) << 8) \ + : __strchr_g ((s), (c)))) +# endif +# 1384 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Find the last occurrence of C in S. */ +# define _HAVE_STRING_ARCH_strrchr 1 +# define strrchr(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strrchr_c ((s), ((c) & 0xff) << 8) \ + : __strrchr_g ((s), (c)))) + +# ifdef __i686__ +__STRING_INLINE char *__strrchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_c (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "cmpb %h2,%b2\n\t" + "cmove %1,%0\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (1), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} + +__STRING_INLINE char *__strrchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_g (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("movb %b2,%h2\n" + "cld\n\t" + "1:\n\t" + "lodsb\n\t" + "cmpb %h2,%b2\n\t" + "cmove %1,%0\n\t" + "testb %b2,%b2\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (1), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res - 1; +} +# else +# 1439 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +__STRING_INLINE char *__strrchr_c (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_c (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=d" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (0), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} + +__STRING_INLINE char *__strrchr_g (const char *__s, int __c); + +__STRING_INLINE char * +__strrchr_g (const char *__s, int __c) +{ + register unsigned long int __d0, __d1; + register char *__res; + __asm__ __volatile__ + ("movb %%al,%%ah\n" + "cld\n\t" + "1:\n\t" + "lodsb\n\t" + "cmpb %%ah,%%al\n\t" + "jne 2f\n\t" + "leal -1(%%esi),%0\n" + "2:\n\t" + "testb %%al,%%al\n\t" + "jne 1b" + : "=r" (__res), "=&S" (__d0), "=&a" (__d1) + : "0" (0), "1" (__s), "2" (__c), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return __res; +} +# endif +# 1488 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +# if defined __USE_BSD || defined __USE_XOPEN_EXTENDED +/* Find the last occurrence of C in S. This is the BSD name. */ +# define _HAVE_STRING_ARCH_rindex 1 +# define rindex(s, c) \ + (__extension__ (__builtin_constant_p (c) \ + ? __strrchr_c ((s), ((c) & 0xff) << 8) \ + : __strrchr_g ((s), (c)))) +# endif +# 1498 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Return the length of the initial segment of S which + consists entirely of characters not in REJECT. */ +# define _HAVE_STRING_ARCH_strcspn 1 +# define strcspn(s, reject) \ + (__extension__ (__builtin_constant_p (reject) && sizeof ((reject)[0]) == 1 \ + ? ((reject)[0] == '\0' \ + ? strlen (s) \ + : ((reject)[1] == '\0' \ + ? __strcspn_c1 ((s), (((reject)[0] << 8) & 0xff00)) \ + : __strcspn_cg ((s), (reject), strlen (reject)))) \ + : __strcspn_g ((s), (reject)))) + +__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); + +# ifndef _FORCE_INLINES +__STRING_INLINE size_t +__strcspn_c1 (const char *__s, int __reject) +{ + register unsigned long int __d0; + register char *__res; + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%%al\n\t" + "leal 1(%0),%0\n\t" + "cmpb %%ah,%%al\n\t" + "je 2f\n\t" + "testb %%al,%%al\n\t" + "jne 1b\n" + "2:" + : "=r" (__res), "=&a" (__d0) + : "0" (__s), "1" (__reject), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return (__res - 1) - __s; +} +# endif +# 1536 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +__STRING_INLINE size_t __strcspn_cg (const char *__s, const char __reject[], + size_t __reject_len); + +__STRING_INLINE size_t +__strcspn_cg (const char *__s, const char __reject[], size_t __reject_len) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "d" (__reject), "g" (__reject_len) + : "memory", "cc"); + return (__res - 1) - __s; +} + +__STRING_INLINE size_t __strcspn_g (const char *__s, const char *__reject); +# ifdef __PIC__ + +__STRING_INLINE size_t +__strcspn_g (const char *__s, const char *__reject) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "movl %4,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %4,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "r" (__reject), "0" (__s), "1" (0), "2" (0xffffffff) + : "memory", "cc"); + return (__res - 1) - __s; +} +# else +# 1593 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +__STRING_INLINE size_t +__strcspn_g (const char *__s, const char *__reject) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register const char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__reject), "b" (__reject) + /* Clobber memory, otherwise GCC cannot handle this. */ + : "memory", "cc"); + return (__res - 1) - __s; +} +# endif +# 1619 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Return the length of the initial segment of S which + consists entirely of characters in ACCEPT. */ +# define _HAVE_STRING_ARCH_strspn 1 +# define strspn(s, accept) \ + (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ + ? ((accept)[0] == '\0' \ + ? ((void) (s), 0) \ + : ((accept)[1] == '\0' \ + ? __strspn_c1 ((s), (((accept)[0] << 8 ) & 0xff00)) \ + : __strspn_cg ((s), (accept), strlen (accept)))) \ + : __strspn_g ((s), (accept)))) + +# ifndef _FORCE_INLINES +__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); + +__STRING_INLINE size_t +__strspn_c1 (const char *__s, int __accept) +{ + register unsigned long int __d0; + register char *__res; + /* Please note that __accept never can be '\0'. */ + __asm__ __volatile__ + ("1:\n\t" + "movb (%0),%b1\n\t" + "leal 1(%0),%0\n\t" + "cmpb %h1,%b1\n\t" + "je 1b" + : "=r" (__res), "=&q" (__d0) + : "0" (__s), "1" (__accept), + "m" ( *(struct { char __x[0xfffffff]; } *)__s) + : "cc"); + return (__res - 1) - __s; +} +# endif +# 1655 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +__STRING_INLINE size_t __strspn_cg (const char *__s, const char __accept[], + size_t __accept_len); + +__STRING_INLINE size_t +__strspn_cg (const char *__s, const char __accept[], size_t __accept_len) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "g" (__accept), "g" (__accept_len), + /* Since we do not know how large the memory we access it, use a + really large amount. */ + "m" ( *(struct { char __x[0xfffffff]; } *)__s), + "m" ( *(struct { __extension__ char __x[__accept_len]; } *)__accept) + : "cc"); + return (__res - 1) - __s; +} + +__STRING_INLINE size_t __strspn_g (const char *__s, const char *__accept); +# ifdef __PIC__ + +__STRING_INLINE size_t +__strspn_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2; + register const char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%edx,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept) + : "memory", "cc"); + return (__res - 1) - __s; +} +# else +# 1715 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +__STRING_INLINE size_t +__strspn_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register const char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "je 1b\n" + "2:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "3" (__accept), "b" (__accept) + : "memory", "cc"); + return (__res - 1) - __s; +} +# endif +# 1740 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Find the first occurrence in S of any character in ACCEPT. */ +# define _HAVE_STRING_ARCH_strpbrk 1 +# define strpbrk(s, accept) \ + (__extension__ (__builtin_constant_p (accept) && sizeof ((accept)[0]) == 1 \ + ? ((accept)[0] == '\0' \ + ? ((void) (s), (char *) 0) \ + : ((accept)[1] == '\0' \ + ? strchr ((s), (accept)[0]) \ + : __strpbrk_cg ((s), (accept), strlen (accept)))) \ + : __strpbrk_g ((s), (accept)))) + +__STRING_INLINE char *__strpbrk_cg (const char *__s, const char __accept[], + size_t __accept_len); + +__STRING_INLINE char * +__strpbrk_cg (const char *__s, const char __accept[], size_t __accept_len) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %5,%%edi\n\t" + "movl %6,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "0" (__s), "d" (__accept), "g" (__accept_len) + : "memory", "cc"); + return __res; +} + +__STRING_INLINE char *__strpbrk_g (const char *__s, const char *__accept); +# ifdef __PIC__ + +__STRING_INLINE char * +__strpbrk_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("pushl %%ebx\n\t" + "movl %%edx,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%ebx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%edx,%%edi\n\t" + "movl %%ebx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:\n\t" + "popl %%ebx" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&D" (__d2) + : "d" (__accept), "0" (__s), "1" (0), "2" (0xffffffff) + : "memory", "cc"); + return __res; +} +# else +# 1817 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +__STRING_INLINE char * +__strpbrk_g (const char *__s, const char *__accept) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register char *__res; + __asm__ __volatile__ + ("movl %%ebx,%%edi\n\t" + "cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "leal -1(%%ecx),%%edx\n" + "1:\n\t" + "lodsb\n\t" + "testb %%al,%%al\n\t" + "je 2f\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%edx,%%ecx\n\t" + "repne; scasb\n\t" + "jne 1b\n\t" + "decl %0\n\t" + "jmp 3f\n" + "2:\n\t" + "xorl %0,%0\n" + "3:" + : "=S" (__res), "=&a" (__d0), "=&c" (__d1), "=&d" (__d2), "=&D" (__d3) + : "0" (__s), "1" (0), "2" (0xffffffff), "b" (__accept) + : "memory", "cc"); + return __res; +} +# endif +# 1847 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Find the first occurrence of NEEDLE in HAYSTACK. */ +# define _HAVE_STRING_ARCH_strstr 1 +# define strstr(haystack, needle) \ + (__extension__ (__builtin_constant_p (needle) && sizeof ((needle)[0]) == 1 \ + ? ((needle)[0] == '\0' \ + ? (haystack) \ + : ((needle)[1] == '\0' \ + ? strchr ((haystack), (needle)[0]) \ + : __strstr_cg ((haystack), (needle), \ + strlen (needle)))) \ + : __strstr_g ((haystack), (needle)))) + +/* Please note that this function need not handle NEEDLEs with a + length shorter than two. */ +__STRING_INLINE char *__strstr_cg (const char *__haystack, + const char __needle[], + size_t __needle_len); + +__STRING_INLINE char * +__strstr_cg (const char *__haystack, const char __needle[], + size_t __needle_len) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n" \ + "1:\n\t" + "movl %6,%%edi\n\t" + "movl %5,%%eax\n\t" + "movl %4,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%5\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:" + : "=&a" (__res), "=&S" (__d0), "=&D" (__d1), "=&c" (__d2) + : "g" (__needle_len), "1" (__haystack), "d" (__needle) + : "memory", "cc"); + return __res; +} + +__STRING_INLINE char *__strstr_g (const char *__haystack, + const char *__needle); +# ifdef __PIC__ + +__STRING_INLINE char * +__strstr_g (const char *__haystack, const char *__needle) +{ + register unsigned long int __d0, __d1, __d2; + register char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "pushl %%ebx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%ebx\n" + "1:\n\t" + "movl %%edx,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%ebx,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%%esi\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:\n\t" + "popl %%ebx" + : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2) + : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), + "d" (__needle) + : "memory", "cc"); + return __res; +} +# else +# 1927 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +__STRING_INLINE char * +__strstr_g (const char *__haystack, const char *__needle) +{ + register unsigned long int __d0, __d1, __d2, __d3; + register char *__res; + __asm__ __volatile__ + ("cld\n\t" + "repne; scasb\n\t" + "notl %%ecx\n\t" + "decl %%ecx\n\t" /* NOTE! This also sets Z if searchstring='' */ + "movl %%ecx,%%edx\n" + "1:\n\t" + "movl %%ebx,%%edi\n\t" + "movl %%esi,%%eax\n\t" + "movl %%edx,%%ecx\n\t" + "repe; cmpsb\n\t" + "je 2f\n\t" /* also works for empty string, see above */ + "cmpb $0,-1(%%esi)\n\t" + "leal 1(%%eax),%%esi\n\t" + "jne 1b\n\t" + "xorl %%eax,%%eax\n" + "2:" + : "=&a" (__res), "=&c" (__d0), "=&S" (__d1), "=&D" (__d2), "=&d" (__d3) + : "0" (0), "1" (0xffffffff), "2" (__haystack), "3" (__needle), + "b" (__needle) + : "memory", "cc"); + return __res; +} +# endif +# 1956 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + + +/* Bit find functions. We define only the i686 version since for the other + processors gcc generates good code. */ +# if defined __USE_BSD || defined __USE_XOPEN_EXTENDED +# ifdef __i686__ +# define _HAVE_STRING_ARCH_ffs 1 +# define ffs(word) (__builtin_constant_p (word) \ + ? __builtin_ffs (word) \ + : ({ int __cnt, __tmp; \ + __asm__ __volatile__ \ + ("bsfl %2,%0\n\t" \ + "cmovel %1,%0" \ + : "=&r" (__cnt), "=r" (__tmp) \ + : "rm" (word), "1" (-1)); \ + __cnt + 1; })) + +# ifndef ffsl +# define ffsl(word) ffs(word) +# endif +# 1976 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif /* i686 */ +# 1977 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# endif /* BSD || X/Open */ +# 1978 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +# ifndef _FORCE_INLINES +# undef __STRING_INLINE +# endif +# 1982 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +# endif /* use string inlines && GNU CC */ +# 1984 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 + +#endif +# 1986 "/usr/include/x86_64-linux-gnu/bits/string.h" 3 4 +# 633 "/usr/include/string.h" 2 3 4 + +/* These are generic optimizations which do not add too much inline code. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 635 "/usr/include/string.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/string2.h" 1 3 4 +/* Machine-independant string function optimizations. + Copyright (C) 1997-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _STRING_H +# error "Never use directly; include instead." +#endif +# 23 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#ifndef __NO_STRING_INLINES + +/* Unlike the definitions in the header the + definitions contained here are not optimized down to assembler + level. Those optimizations are not always a good idea since this + means the code size increases a lot. Instead the definitions here + optimize some functions in a way which do not dramatically + increase the code size and which do not use assembler. The main + trick is to use GCC's `__builtin_constant_p' function. + + Every function XXX which has a defined version in + must be accompanied by a symbol _HAVE_STRING_ARCH_XXX + to make sure we don't get redefinitions. + + We must use here macros instead of inline functions since the + trick won't work with the latter. */ + +#ifndef __STRING_INLINE +# ifdef __cplusplus +# define __STRING_INLINE inline +# else +# 45 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __STRING_INLINE __extern_inline +# endif +# 47 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 48 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#if _STRING_ARCH_unaligned +/* If we can do unaligned memory accesses we must know the endianess. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 51 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# 1 "/usr/include/endian.h" 1 3 4 +/* Copyright (C) 1992-2014 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#ifndef _ENDIAN_H +#define _ENDIAN_H 1 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 21 "/usr/include/endian.h" 3 4 +# 22 "/usr/include/endian.h" 3 4 + +/* Definitions for byte order, according to significance of bytes, + from low addresses to high addresses. The value is what you get by + putting '4' in the most significant byte, '3' in the second most + significant byte, '2' in the second least significant byte, and '1' + in the least significant byte, and then writing down one digit for + each byte, starting with the byte at the lowest address at the left, + and proceeding to the byte with the highest address at the right. */ + +#define __LITTLE_ENDIAN 1234 +#define __BIG_ENDIAN 4321 +#define __PDP_ENDIAN 3412 + +/* This file defines `__BYTE_ORDER' for the particular machine. */ +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 36 "/usr/include/endian.h" 3 4 +# 1 "/usr/include/x86_64-linux-gnu/bits/endian.h" 1 3 4 +/* i386/x86_64 are little-endian. */ + +#ifndef _ENDIAN_H +# error "Never use directly; include instead." +#endif +# 6 "/usr/include/x86_64-linux-gnu/bits/endian.h" 3 4 + +#define __BYTE_ORDER __LITTLE_ENDIAN +# 37 "/usr/include/endian.h" 2 3 4 + +/* Some machines may need to use a different endianness for floating point + values. */ +#ifndef __FLOAT_WORD_ORDER +# define __FLOAT_WORD_ORDER __BYTE_ORDER +#endif +# 43 "/usr/include/endian.h" 3 4 + +#ifdef __USE_BSD +# define LITTLE_ENDIAN __LITTLE_ENDIAN +# define BIG_ENDIAN __BIG_ENDIAN +# define PDP_ENDIAN __PDP_ENDIAN +# define BYTE_ORDER __BYTE_ORDER +#endif +# 50 "/usr/include/endian.h" 3 4 + +#if __BYTE_ORDER == __LITTLE_ENDIAN +# define __LONG_LONG_PAIR(HI, LO) LO, HI +#elif __BYTE_ORDER == __BIG_ENDIAN +# 54 "/usr/include/endian.h" 3 4 +# define __LONG_LONG_PAIR(HI, LO) HI, LO +#endif +# 56 "/usr/include/endian.h" 3 4 + + +#if defined __USE_BSD && !defined __ASSEMBLER__ +/* Conversion interfaces. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 60 "/usr/include/endian.h" 3 4 +# 61 "/usr/include/endian.h" 3 4 + +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define htobe16(x) __bswap_16 (x) +# define htole16(x) (x) +# define be16toh(x) __bswap_16 (x) +# define le16toh(x) (x) + +# define htobe32(x) __bswap_32 (x) +# define htole32(x) (x) +# define be32toh(x) __bswap_32 (x) +# define le32toh(x) (x) + +# define htobe64(x) __bswap_64 (x) +# define htole64(x) (x) +# define be64toh(x) __bswap_64 (x) +# define le64toh(x) (x) + +# else +# 79 "/usr/include/endian.h" 3 4 +# define htobe16(x) (x) +# define htole16(x) __bswap_16 (x) +# define be16toh(x) (x) +# define le16toh(x) __bswap_16 (x) + +# define htobe32(x) (x) +# define htole32(x) __bswap_32 (x) +# define be32toh(x) (x) +# define le32toh(x) __bswap_32 (x) + +# define htobe64(x) (x) +# define htole64(x) __bswap_64 (x) +# define be64toh(x) (x) +# define le64toh(x) __bswap_64 (x) +# endif +# 94 "/usr/include/endian.h" 3 4 +#endif +# 95 "/usr/include/endian.h" 3 4 + +#endif /* endian.h */ +# 97 "/usr/include/endian.h" 3 4 +# 52 "/usr/include/x86_64-linux-gnu/bits/string2.h" 2 3 4 +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 52 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# 53 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define __STRING2_SMALL_GET16(src, idx) \ + (((const unsigned char *) (const char *) (src))[idx + 1] << 8 \ + | ((const unsigned char *) (const char *) (src))[idx]) +# define __STRING2_SMALL_GET32(src, idx) \ + (((((const unsigned char *) (const char *) (src))[idx + 3] << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 2]) << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 1]) << 8 \ + | ((const unsigned char *) (const char *) (src))[idx]) +# else +# 64 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __STRING2_SMALL_GET16(src, idx) \ + (((const unsigned char *) (const char *) (src))[idx] << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 1]) +# define __STRING2_SMALL_GET32(src, idx) \ + (((((const unsigned char *) (const char *) (src))[idx] << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 1]) << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 2]) << 8 \ + | ((const unsigned char *) (const char *) (src))[idx + 3]) +# endif +# 73 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#else +# 74 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +/* These are a few types we need for the optimizations if we cannot + use unaligned memory accesses. */ +# define __STRING2_COPY_TYPE(N) \ + typedef struct { unsigned char __arr[N]; } \ + __attribute__ ((__packed__)) __STRING2_COPY_ARR##N +__STRING2_COPY_TYPE (2); +__STRING2_COPY_TYPE (3); +__STRING2_COPY_TYPE (4); +__STRING2_COPY_TYPE (5); +__STRING2_COPY_TYPE (6); +__STRING2_COPY_TYPE (7); +__STRING2_COPY_TYPE (8); +# undef __STRING2_COPY_TYPE +#endif +# 88 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +/* Dereferencing a pointer arg to run sizeof on it fails for the void + pointer case, so we use this instead. + Note that __x is evaluated twice. */ +#define __string2_1bptr_p(__x) \ + ((size_t)(const void *)((__x) + 1) - (size_t)(const void *)(__x) == 1) + +/* Set N bytes of S to C. */ +#if !defined _HAVE_STRING_ARCH_memset +# if !__GNUC_PREREQ (3, 0) +# if _STRING_ARCH_unaligned +# define memset(s, c, n) \ + (__extension__ (__builtin_constant_p (n) && (n) <= 16 \ + ? ((n) == 1 \ + ? __memset_1 (s, c) \ + : __memset_gc (s, c, n)) \ + : (__builtin_constant_p (c) && (c) == '\0' \ + ? ({ void *__s = (s); __bzero (__s, n); __s; }) \ + : memset (s, c, n)))) + +# define __memset_1(s, c) ({ void *__s = (s); \ + *((__uint8_t *) __s) = (__uint8_t) c; __s; }) + +# define __memset_gc(s, c, n) \ + ({ void *__s = (s); \ + union { \ + unsigned int __ui; \ + unsigned short int __usi; \ + unsigned char __uc; \ + } *__u = __s; \ + __uint8_t __c = (__uint8_t) (c); \ + \ + /* This `switch' statement will be removed at compile-time. */ \ + switch ((unsigned int) (n)) \ + { \ + case 15: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 11: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 7: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 3: \ + __u->__usi = (unsigned short int) __c * 0x0101; \ + __u = __extension__ ((void *) __u + 2); \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 14: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 10: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 6: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 2: \ + __u->__usi = (unsigned short int) __c * 0x0101; \ + break; \ + \ + case 13: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 9: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 5: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 1: \ + __u->__uc = (unsigned char) __c; \ + break; \ + \ + case 16: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 12: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 8: \ + __u->__ui = __c * 0x01010101; \ + __u = __extension__ ((void *) __u + 4); \ + case 4: \ + __u->__ui = __c * 0x01010101; \ + case 0: \ + break; \ + } \ + \ + __s; }) +# else +# 181 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define memset(s, c, n) \ + (__extension__ (__builtin_constant_p (c) && (c) == '\0' \ + ? ({ void *__s = (s); __bzero (__s, n); __s; }) \ + : memset (s, c, n))) +# endif +# 186 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 187 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +/* GCC < 3.0 optimizes memset(s, 0, n) but not bzero(s, n). + The optimization is broken before EGCS 1.1. + GCC 3.0+ has __builtin_bzero as well, but at least till GCC 3.4 + if it decides to call the library function, it calls memset + and not bzero. */ +# if __GNUC_PREREQ (2, 91) +# define __bzero(s, n) __builtin_memset (s, '\0', n) +# endif +# 196 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#endif +# 198 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Copy N bytes from SRC to DEST, returning pointer to byte following the + last copied. */ +#ifdef __USE_GNU +# if !defined _HAVE_STRING_ARCH_mempcpy || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_mempcpy +# if __GNUC_PREREQ (3, 4) +# define __mempcpy(dest, src, n) __builtin_mempcpy (dest, src, n) +# elif __GNUC_PREREQ (3, 0) +# 208 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __mempcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ + && __string2_1bptr_p (src) && n <= 8 \ + ? __builtin_memcpy (dest, src, n) + (n) \ + : __mempcpy (dest, src, n))) +# else +# 214 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __mempcpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ + && __string2_1bptr_p (src) && n <= 8 \ + ? __mempcpy_small (dest, __mempcpy_args (src), n) \ + : __mempcpy (dest, src, n))) +# endif +# 220 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +/* In glibc we use this function frequently but for namespace reasons + we have to use the name `__mempcpy'. */ +# define mempcpy(dest, src, n) __mempcpy (dest, src, n) +# endif +# 224 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES +# if _STRING_ARCH_unaligned +# ifndef _FORCE_INLINES +# define __mempcpy_args(src) \ + ((const char *) (src))[0], ((const char *) (src))[2], \ + ((const char *) (src))[4], ((const char *) (src))[6], \ + __extension__ __STRING2_SMALL_GET16 (src, 0), \ + __extension__ __STRING2_SMALL_GET16 (src, 4), \ + __extension__ __STRING2_SMALL_GET32 (src, 0), \ + __extension__ __STRING2_SMALL_GET32 (src, 4) +# endif +# 236 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE void *__mempcpy_small (void *, char, char, char, char, + __uint16_t, __uint16_t, __uint32_t, + __uint32_t, size_t); +__STRING_INLINE void * +__mempcpy_small (void *__dest1, + char __src0_1, char __src2_1, char __src4_1, char __src6_1, + __uint16_t __src0_2, __uint16_t __src4_2, + __uint32_t __src0_4, __uint32_t __src4_4, + size_t __srclen) +{ + union { + __uint32_t __ui; + __uint16_t __usi; + unsigned char __uc; + unsigned char __c; + } *__u = __dest1; + switch ((unsigned int) __srclen) + { + case 1: + __u->__c = __src0_1; + __u = __extension__ ((void *) __u + 1); + break; + case 2: + __u->__usi = __src0_2; + __u = __extension__ ((void *) __u + 2); + break; + case 3: + __u->__usi = __src0_2; + __u = __extension__ ((void *) __u + 2); + __u->__c = __src2_1; + __u = __extension__ ((void *) __u + 1); + break; + case 4: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + break; + case 5: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__c = __src4_1; + __u = __extension__ ((void *) __u + 1); + break; + case 6: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + __u = __extension__ ((void *) __u + 2); + break; + case 7: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + __u = __extension__ ((void *) __u + 2); + __u->__c = __src6_1; + __u = __extension__ ((void *) __u + 1); + break; + case 8: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__ui = __src4_4; + __u = __extension__ ((void *) __u + 4); + break; + } + return (void *) __u; +} +# else +# 302 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# ifndef _FORCE_INLINES +# define __mempcpy_args(src) \ + ((const char *) (src))[0], \ + __extension__ ((__STRING2_COPY_ARR2) \ + { { ((const char *) (src))[0], ((const char *) (src))[1] } }), \ + __extension__ ((__STRING2_COPY_ARR3) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2] } }), \ + __extension__ ((__STRING2_COPY_ARR4) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3] } }), \ + __extension__ ((__STRING2_COPY_ARR5) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4] } }), \ + __extension__ ((__STRING2_COPY_ARR6) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5] } }), \ + __extension__ ((__STRING2_COPY_ARR7) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + ((const char *) (src))[6] } }), \ + __extension__ ((__STRING2_COPY_ARR8) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + ((const char *) (src))[6], ((const char *) (src))[7] } }) +# endif +# 332 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE void *__mempcpy_small (void *, char, __STRING2_COPY_ARR2, + __STRING2_COPY_ARR3, + __STRING2_COPY_ARR4, + __STRING2_COPY_ARR5, + __STRING2_COPY_ARR6, + __STRING2_COPY_ARR7, + __STRING2_COPY_ARR8, size_t); +__STRING_INLINE void * +__mempcpy_small (void *__dest, char __src1, + __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, + __STRING2_COPY_ARR4 __src4, __STRING2_COPY_ARR5 __src5, + __STRING2_COPY_ARR6 __src6, __STRING2_COPY_ARR7 __src7, + __STRING2_COPY_ARR8 __src8, size_t __srclen) +{ + union { + char __c; + __STRING2_COPY_ARR2 __sca2; + __STRING2_COPY_ARR3 __sca3; + __STRING2_COPY_ARR4 __sca4; + __STRING2_COPY_ARR5 __sca5; + __STRING2_COPY_ARR6 __sca6; + __STRING2_COPY_ARR7 __sca7; + __STRING2_COPY_ARR8 __sca8; + } *__u = __dest; + switch ((unsigned int) __srclen) + { + case 1: + __u->__c = __src1; + break; + case 2: + __extension__ __u->__sca2 = __src2; + break; + case 3: + __extension__ __u->__sca3 = __src3; + break; + case 4: + __extension__ __u->__sca4 = __src4; + break; + case 5: + __extension__ __u->__sca5 = __src5; + break; + case 6: + __extension__ __u->__sca6 = __src6; + break; + case 7: + __extension__ __u->__sca7 = __src7; + break; + case 8: + __extension__ __u->__sca8 = __src8; + break; + } + return __extension__ ((void *) __u + __srclen); +} +# endif +# 386 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 387 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 388 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 389 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Return pointer to C in S. */ +#ifndef _HAVE_STRING_ARCH_strchr +extern void *__rawmemchr (const void *__s, int __c); +# if __GNUC_PREREQ (3, 2) +# define strchr(s, c) \ + (__extension__ (__builtin_constant_p (c) && !__builtin_constant_p (s) \ + && (c) == '\0' \ + ? (char *) __rawmemchr (s, c) \ + : __builtin_strchr (s, c))) +# else +# 401 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strchr(s, c) \ + (__extension__ (__builtin_constant_p (c) && (c) == '\0' \ + ? (char *) __rawmemchr (s, c) \ + : strchr (s, c))) +# endif +# 406 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 407 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Copy SRC to DEST. */ +#if (!defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0)) \ + || defined _FORCE_INLINES +# if !defined _HAVE_STRING_ARCH_strcpy && !__GNUC_PREREQ (3, 0) +# define strcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ + ? __strcpy_small (dest, __strcpy_args (src), \ + strlen (src) + 1) \ + : (char *) memcpy (dest, src, strlen (src) + 1)) \ + : strcpy (dest, src))) +# endif +# 421 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# if _STRING_ARCH_unaligned +# ifndef _FORCE_INLINES +# define __strcpy_args(src) \ + __extension__ __STRING2_SMALL_GET16 (src, 0), \ + __extension__ __STRING2_SMALL_GET16 (src, 4), \ + __extension__ __STRING2_SMALL_GET32 (src, 0), \ + __extension__ __STRING2_SMALL_GET32 (src, 4) +# endif +# 430 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE char *__strcpy_small (char *, __uint16_t, __uint16_t, + __uint32_t, __uint32_t, size_t); +__STRING_INLINE char * +__strcpy_small (char *__dest, + __uint16_t __src0_2, __uint16_t __src4_2, + __uint32_t __src0_4, __uint32_t __src4_4, + size_t __srclen) +{ + union { + __uint32_t __ui; + __uint16_t __usi; + unsigned char __uc; + } *__u = (void *) __dest; + switch ((unsigned int) __srclen) + { + case 1: + __u->__uc = '\0'; + break; + case 2: + __u->__usi = __src0_2; + break; + case 3: + __u->__usi = __src0_2; + __u = __extension__ ((void *) __u + 2); + __u->__uc = '\0'; + break; + case 4: + __u->__ui = __src0_4; + break; + case 5: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__uc = '\0'; + break; + case 6: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + break; + case 7: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + __u = __extension__ ((void *) __u + 2); + __u->__uc = '\0'; + break; + case 8: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__ui = __src4_4; + break; + } + return __dest; +} +# else +# 485 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# ifndef _FORCE_INLINES +# define __strcpy_args(src) \ + __extension__ ((__STRING2_COPY_ARR2) \ + { { ((const char *) (src))[0], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR3) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR4) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR5) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR6) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR7) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR8) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + ((const char *) (src))[6], '\0' } }) +# endif +# 514 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE char *__strcpy_small (char *, __STRING2_COPY_ARR2, + __STRING2_COPY_ARR3, + __STRING2_COPY_ARR4, + __STRING2_COPY_ARR5, + __STRING2_COPY_ARR6, + __STRING2_COPY_ARR7, + __STRING2_COPY_ARR8, size_t); +__STRING_INLINE char * +__strcpy_small (char *__dest, + __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, + __STRING2_COPY_ARR4 __src4, __STRING2_COPY_ARR5 __src5, + __STRING2_COPY_ARR6 __src6, __STRING2_COPY_ARR7 __src7, + __STRING2_COPY_ARR8 __src8, size_t __srclen) +{ + union { + char __c; + __STRING2_COPY_ARR2 __sca2; + __STRING2_COPY_ARR3 __sca3; + __STRING2_COPY_ARR4 __sca4; + __STRING2_COPY_ARR5 __sca5; + __STRING2_COPY_ARR6 __sca6; + __STRING2_COPY_ARR7 __sca7; + __STRING2_COPY_ARR8 __sca8; + } *__u = (void *) __dest; + switch ((unsigned int) __srclen) + { + case 1: + __u->__c = '\0'; + break; + case 2: + __extension__ __u->__sca2 = __src2; + break; + case 3: + __extension__ __u->__sca3 = __src3; + break; + case 4: + __extension__ __u->__sca4 = __src4; + break; + case 5: + __extension__ __u->__sca5 = __src5; + break; + case 6: + __extension__ __u->__sca6 = __src6; + break; + case 7: + __extension__ __u->__sca7 = __src7; + break; + case 8: + __extension__ __u->__sca8 = __src8; + break; + } + return __dest; +} +# endif +# 568 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 569 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Copy SRC to DEST, returning pointer to final NUL byte. */ +#ifdef __USE_GNU +# if !defined _HAVE_STRING_ARCH_stpcpy || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_stpcpy +# if __GNUC_PREREQ (3, 4) +# define __stpcpy(dest, src) __builtin_stpcpy (dest, src) +# elif __GNUC_PREREQ (3, 0) +# 578 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __stpcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ + ? __builtin_strcpy (dest, src) + strlen (src) \ + : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ + - 1)) \ + : __stpcpy (dest, src))) +# else +# 586 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define __stpcpy(dest, src) \ + (__extension__ (__builtin_constant_p (src) \ + ? (__string2_1bptr_p (src) && strlen (src) + 1 <= 8 \ + ? __stpcpy_small (dest, __stpcpy_args (src), \ + strlen (src) + 1) \ + : ((char *) (__mempcpy) (dest, src, strlen (src) + 1) \ + - 1)) \ + : __stpcpy (dest, src))) +# endif +# 595 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +/* In glibc we use this function frequently but for namespace reasons + we have to use the name `__stpcpy'. */ +# define stpcpy(dest, src) __stpcpy (dest, src) +# endif +# 599 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# if !__GNUC_PREREQ (3, 0) || defined _FORCE_INLINES +# if _STRING_ARCH_unaligned +# ifndef _FORCE_INLINES +# define __stpcpy_args(src) \ + __extension__ __STRING2_SMALL_GET16 (src, 0), \ + __extension__ __STRING2_SMALL_GET16 (src, 4), \ + __extension__ __STRING2_SMALL_GET32 (src, 0), \ + __extension__ __STRING2_SMALL_GET32 (src, 4) +# endif +# 609 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE char *__stpcpy_small (char *, __uint16_t, __uint16_t, + __uint32_t, __uint32_t, size_t); +__STRING_INLINE char * +__stpcpy_small (char *__dest, + __uint16_t __src0_2, __uint16_t __src4_2, + __uint32_t __src0_4, __uint32_t __src4_4, + size_t __srclen) +{ + union { + unsigned int __ui; + unsigned short int __usi; + unsigned char __uc; + char __c; + } *__u = (void *) __dest; + switch ((unsigned int) __srclen) + { + case 1: + __u->__uc = '\0'; + break; + case 2: + __u->__usi = __src0_2; + __u = __extension__ ((void *) __u + 1); + break; + case 3: + __u->__usi = __src0_2; + __u = __extension__ ((void *) __u + 2); + __u->__uc = '\0'; + break; + case 4: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 3); + break; + case 5: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__uc = '\0'; + break; + case 6: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + __u = __extension__ ((void *) __u + 1); + break; + case 7: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__usi = __src4_2; + __u = __extension__ ((void *) __u + 2); + __u->__uc = '\0'; + break; + case 8: + __u->__ui = __src0_4; + __u = __extension__ ((void *) __u + 4); + __u->__ui = __src4_4; + __u = __extension__ ((void *) __u + 3); + break; + } + return &__u->__c; +} +# else +# 669 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# ifndef _FORCE_INLINES +# define __stpcpy_args(src) \ + __extension__ ((__STRING2_COPY_ARR2) \ + { { ((const char *) (src))[0], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR3) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR4) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR5) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR6) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR7) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + '\0' } }), \ + __extension__ ((__STRING2_COPY_ARR8) \ + { { ((const char *) (src))[0], ((const char *) (src))[1], \ + ((const char *) (src))[2], ((const char *) (src))[3], \ + ((const char *) (src))[4], ((const char *) (src))[5], \ + ((const char *) (src))[6], '\0' } }) +# endif +# 698 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +__STRING_INLINE char *__stpcpy_small (char *, __STRING2_COPY_ARR2, + __STRING2_COPY_ARR3, + __STRING2_COPY_ARR4, + __STRING2_COPY_ARR5, + __STRING2_COPY_ARR6, + __STRING2_COPY_ARR7, + __STRING2_COPY_ARR8, size_t); +__STRING_INLINE char * +__stpcpy_small (char *__dest, + __STRING2_COPY_ARR2 __src2, __STRING2_COPY_ARR3 __src3, + __STRING2_COPY_ARR4 __src4, __STRING2_COPY_ARR5 __src5, + __STRING2_COPY_ARR6 __src6, __STRING2_COPY_ARR7 __src7, + __STRING2_COPY_ARR8 __src8, size_t __srclen) +{ + union { + char __c; + __STRING2_COPY_ARR2 __sca2; + __STRING2_COPY_ARR3 __sca3; + __STRING2_COPY_ARR4 __sca4; + __STRING2_COPY_ARR5 __sca5; + __STRING2_COPY_ARR6 __sca6; + __STRING2_COPY_ARR7 __sca7; + __STRING2_COPY_ARR8 __sca8; + } *__u = (void *) __dest; + switch ((unsigned int) __srclen) + { + case 1: + __u->__c = '\0'; + break; + case 2: + __extension__ __u->__sca2 = __src2; + break; + case 3: + __extension__ __u->__sca3 = __src3; + break; + case 4: + __extension__ __u->__sca4 = __src4; + break; + case 5: + __extension__ __u->__sca5 = __src5; + break; + case 6: + __extension__ __u->__sca6 = __src6; + break; + case 7: + __extension__ __u->__sca7 = __src7; + break; + case 8: + __extension__ __u->__sca8 = __src8; + break; + } + return __dest + __srclen - 1; +} +# endif +# 752 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 753 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 754 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 755 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Copy no more than N characters of SRC to DEST. */ +#ifndef _HAVE_STRING_ARCH_strncpy +# if __GNUC_PREREQ (3, 2) +# define strncpy(dest, src, n) __builtin_strncpy (dest, src, n) +# else +# 762 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strncpy(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ + ? (strlen (src) + 1 >= ((size_t) (n)) \ + ? (char *) memcpy (dest, src, n) \ + : strncpy (dest, src, n)) \ + : strncpy (dest, src, n))) +# endif +# 769 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 770 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Append no more than N characters from SRC onto DEST. */ +#ifndef _HAVE_STRING_ARCH_strncat +# ifdef _USE_STRING_ARCH_strchr +# define strncat(dest, src, n) \ + (__extension__ ({ char *__dest = (dest); \ + __builtin_constant_p (src) && __builtin_constant_p (n) \ + ? (strlen (src) < ((size_t) (n)) \ + ? strcat (__dest, src) \ + : (*((char *) __mempcpy (strchr (__dest, '\0'), \ + src, n)) = '\0', __dest)) \ + : strncat (dest, src, n); })) +# elif __GNUC_PREREQ (3, 2) +# 784 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strncat(dest, src, n) __builtin_strncat (dest, src, n) +# else +# 786 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strncat(dest, src, n) \ + (__extension__ (__builtin_constant_p (src) && __builtin_constant_p (n) \ + ? (strlen (src) < ((size_t) (n)) \ + ? strcat (dest, src) \ + : strncat (dest, src, n)) \ + : strncat (dest, src, n))) +# endif +# 793 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 794 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Compare characters of S1 and S2. */ +#ifndef _HAVE_STRING_ARCH_strcmp +# if __GNUC_PREREQ (3, 2) +# define strcmp(s1, s2) \ + __extension__ \ + ({ size_t __s1_len, __s2_len; \ + (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ + && (__s1_len = __builtin_strlen (s1), __s2_len = __builtin_strlen (s2), \ + (!__string2_1bptr_p (s1) || __s1_len >= 4) \ + && (!__string2_1bptr_p (s2) || __s2_len >= 4)) \ + ? __builtin_strcmp (s1, s2) \ + : (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ + && (__s1_len = __builtin_strlen (s1), __s1_len < 4) \ + ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ + ? __builtin_strcmp (s1, s2) \ + : __strcmp_cg (s1, s2, __s1_len)) \ + : (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ + && (__s2_len = __builtin_strlen (s2), __s2_len < 4) \ + ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ + ? __builtin_strcmp (s1, s2) \ + : __strcmp_gc (s1, s2, __s2_len)) \ + : __builtin_strcmp (s1, s2)))); }) +# else +# 819 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strcmp(s1, s2) \ + __extension__ \ + ({ size_t __s1_len, __s2_len; \ + (__builtin_constant_p (s1) && __builtin_constant_p (s2) \ + && (__s1_len = strlen (s1), __s2_len = strlen (s2), \ + (!__string2_1bptr_p (s1) || __s1_len >= 4) \ + && (!__string2_1bptr_p (s2) || __s2_len >= 4)) \ + ? memcmp ((const char *) (s1), (const char *) (s2), \ + (__s1_len < __s2_len ? __s1_len : __s2_len) + 1) \ + : (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ + && (__s1_len = strlen (s1), __s1_len < 4) \ + ? (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ + ? __strcmp_cc (s1, s2, __s1_len) \ + : __strcmp_cg (s1, s2, __s1_len)) \ + : (__builtin_constant_p (s2) && __string2_1bptr_p (s2) \ + && (__s2_len = strlen (s2), __s2_len < 4) \ + ? (__builtin_constant_p (s1) && __string2_1bptr_p (s1) \ + ? __strcmp_cc (s1, s2, __s2_len) \ + : __strcmp_gc (s1, s2, __s2_len)) \ + : strcmp (s1, s2)))); }) +# endif +# 840 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# define __strcmp_cc(s1, s2, l) \ + (__extension__ ({ int __result = \ + (((const unsigned char *) (const char *) (s1))[0] \ + - ((const unsigned char *) (const char *)(s2))[0]); \ + if (l > 0 && __result == 0) \ + { \ + __result = (((const unsigned char *) \ + (const char *) (s1))[1] \ + - ((const unsigned char *) \ + (const char *) (s2))[1]); \ + if (l > 1 && __result == 0) \ + { \ + __result = \ + (((const unsigned char *) \ + (const char *) (s1))[2] \ + - ((const unsigned char *) \ + (const char *) (s2))[2]); \ + if (l > 2 && __result == 0) \ + __result = \ + (((const unsigned char *) \ + (const char *) (s1))[3] \ + - ((const unsigned char *) \ + (const char *) (s2))[3]); \ + } \ + } \ + __result; })) + +# define __strcmp_cg(s1, s2, l1) \ + (__extension__ ({ const unsigned char *__s2 = \ + (const unsigned char *) (const char *) (s2); \ + int __result = \ + (((const unsigned char *) (const char *) (s1))[0] \ + - __s2[0]); \ + if (l1 > 0 && __result == 0) \ + { \ + __result = (((const unsigned char *) \ + (const char *) (s1))[1] - __s2[1]); \ + if (l1 > 1 && __result == 0) \ + { \ + __result = (((const unsigned char *) \ + (const char *) (s1))[2] - __s2[2]); \ + if (l1 > 2 && __result == 0) \ + __result = (((const unsigned char *) \ + (const char *) (s1))[3] \ + - __s2[3]); \ + } \ + } \ + __result; })) + +# define __strcmp_gc(s1, s2, l2) (- __strcmp_cg (s2, s1, l2)) +#endif +# 892 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Compare N characters of S1 and S2. */ +#ifndef _HAVE_STRING_ARCH_strncmp +# define strncmp(s1, s2, n) \ + (__extension__ (__builtin_constant_p (n) \ + && ((__builtin_constant_p (s1) \ + && strlen (s1) < ((size_t) (n))) \ + || (__builtin_constant_p (s2) \ + && strlen (s2) < ((size_t) (n)))) \ + ? strcmp (s1, s2) : strncmp (s1, s2, n))) +#endif +# 904 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Return the length of the initial segment of S which + consists entirely of characters not in REJECT. */ +#if !defined _HAVE_STRING_ARCH_strcspn || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_strcspn +# if __GNUC_PREREQ (3, 2) +# define strcspn(s, reject) \ + __extension__ \ + ({ char __r0, __r1, __r2; \ + (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ + ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ + ? __builtin_strcspn (s, reject) \ + : ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ + ? strlen (s) \ + : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ + ? __strcspn_c1 (s, __r0) \ + : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ + ? __strcspn_c2 (s, __r0, __r1) \ + : (((const char *) (reject))[3] == '\0' \ + ? __strcspn_c3 (s, __r0, __r1, __r2) \ + : __builtin_strcspn (s, reject)))))) \ + : __builtin_strcspn (s, reject)); }) +# else +# 928 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strcspn(s, reject) \ + __extension__ \ + ({ char __r0, __r1, __r2; \ + (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ + ? ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \ + ? strlen (s) \ + : ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \ + ? __strcspn_c1 (s, __r0) \ + : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ + ? __strcspn_c2 (s, __r0, __r1) \ + : (((const char *) (reject))[3] == '\0' \ + ? __strcspn_c3 (s, __r0, __r1, __r2) \ + : strcspn (s, reject))))) \ + : strcspn (s, reject)); }) +# endif +# 943 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 944 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject); +__STRING_INLINE size_t +__strcspn_c1 (const char *__s, int __reject) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject) + ++__result; + return __result; +} + +__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1, + int __reject2); +__STRING_INLINE size_t +__strcspn_c2 (const char *__s, int __reject1, int __reject2) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2) + ++__result; + return __result; +} + +__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1, + int __reject2, int __reject3); +__STRING_INLINE size_t +__strcspn_c3 (const char *__s, int __reject1, int __reject2, + int __reject3) +{ + size_t __result = 0; + while (__s[__result] != '\0' && __s[__result] != __reject1 + && __s[__result] != __reject2 && __s[__result] != __reject3) + ++__result; + return __result; +} +#endif +# 980 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Return the length of the initial segment of S which + consists entirely of characters in ACCEPT. */ +#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_strspn +# if __GNUC_PREREQ (3, 2) +# define strspn(s, accept) \ + __extension__ \ + ({ char __a0, __a1, __a2; \ + (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ + ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ + ? __builtin_strspn (s, accept) \ + : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ + ? ((void) (s), (size_t) 0) \ + : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ + ? __strspn_c1 (s, __a0) \ + : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ + ? __strspn_c2 (s, __a0, __a1) \ + : (((const char *) (accept))[3] == '\0' \ + ? __strspn_c3 (s, __a0, __a1, __a2) \ + : __builtin_strspn (s, accept)))))) \ + : __builtin_strspn (s, accept)); }) +# else +# 1004 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strspn(s, accept) \ + __extension__ \ + ({ char __a0, __a1, __a2; \ + (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ + ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ + ? ((void) (s), (size_t) 0) \ + : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ + ? __strspn_c1 (s, __a0) \ + : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ + ? __strspn_c2 (s, __a0, __a1) \ + : (((const char *) (accept))[3] == '\0' \ + ? __strspn_c3 (s, __a0, __a1, __a2) \ + : strspn (s, accept))))) \ + : strspn (s, accept)); }) +# endif +# 1019 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 1020 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept); +__STRING_INLINE size_t +__strspn_c1 (const char *__s, int __accept) +{ + size_t __result = 0; + /* Please note that __accept never can be '\0'. */ + while (__s[__result] == __accept) + ++__result; + return __result; +} + +__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1, + int __accept2); +__STRING_INLINE size_t +__strspn_c2 (const char *__s, int __accept1, int __accept2) +{ + size_t __result = 0; + /* Please note that __accept1 and __accept2 never can be '\0'. */ + while (__s[__result] == __accept1 || __s[__result] == __accept2) + ++__result; + return __result; +} + +__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1, + int __accept2, int __accept3); +__STRING_INLINE size_t +__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3) +{ + size_t __result = 0; + /* Please note that __accept1 to __accept3 never can be '\0'. */ + while (__s[__result] == __accept1 || __s[__result] == __accept2 + || __s[__result] == __accept3) + ++__result; + return __result; +} +#endif +# 1057 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Find the first occurrence in S of any character in ACCEPT. */ +#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_strpbrk +# if __GNUC_PREREQ (3, 2) +# define strpbrk(s, accept) \ + __extension__ \ + ({ char __a0, __a1, __a2; \ + (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ + ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \ + ? __builtin_strpbrk (s, accept) \ + : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ + ? ((void) (s), (char *) NULL) \ + : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ + ? __builtin_strchr (s, __a0) \ + : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ + ? __strpbrk_c2 (s, __a0, __a1) \ + : (((const char *) (accept))[3] == '\0' \ + ? __strpbrk_c3 (s, __a0, __a1, __a2) \ + : __builtin_strpbrk (s, accept)))))) \ + : __builtin_strpbrk (s, accept)); }) +# else +# 1080 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# define strpbrk(s, accept) \ + __extension__ \ + ({ char __a0, __a1, __a2; \ + (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \ + ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \ + ? ((void) (s), (char *) NULL) \ + : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \ + ? strchr (s, __a0) \ + : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \ + ? __strpbrk_c2 (s, __a0, __a1) \ + : (((const char *) (accept))[3] == '\0' \ + ? __strpbrk_c3 (s, __a0, __a1, __a2) \ + : strpbrk (s, accept))))) \ + : strpbrk (s, accept)); }) +# endif +# 1095 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 1096 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1, + int __accept2); +__STRING_INLINE char * +__strpbrk_c2 (const char *__s, int __accept1, int __accept2) +{ + /* Please note that __accept1 and __accept2 never can be '\0'. */ + while (*__s != '\0' && *__s != __accept1 && *__s != __accept2) + ++__s; + return *__s == '\0' ? NULL : (char *) (size_t) __s; +} + +__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1, + int __accept2, int __accept3); +__STRING_INLINE char * +__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3) +{ + /* Please note that __accept1 to __accept3 never can be '\0'. */ + while (*__s != '\0' && *__s != __accept1 && *__s != __accept2 + && *__s != __accept3) + ++__s; + return *__s == '\0' ? NULL : (char *) (size_t) __s; +} +#endif +# 1120 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +/* Find the first occurrence of NEEDLE in HAYSTACK. Newer gcc versions + do this itself. */ +#if !defined _HAVE_STRING_ARCH_strstr && !__GNUC_PREREQ (2, 97) +# define strstr(haystack, needle) \ + (__extension__ (__builtin_constant_p (needle) && __string2_1bptr_p (needle) \ + ? (((const char *) (needle))[0] == '\0' \ + ? (char *) (size_t) (haystack) \ + : (((const char *) (needle))[1] == '\0' \ + ? strchr (haystack, \ + ((const char *) (needle))[0]) \ + : strstr (haystack, needle))) \ + : strstr (haystack, needle))) +#endif +# 1135 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +#if !defined _HAVE_STRING_ARCH_strtok_r || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_strtok_r +# define __strtok_r(s, sep, nextp) \ + (__extension__ (__builtin_constant_p (sep) && __string2_1bptr_p (sep) \ + && ((const char *) (sep))[0] != '\0' \ + && ((const char *) (sep))[1] == '\0' \ + ? __strtok_r_1c (s, ((const char *) (sep))[0], nextp) \ + : __strtok_r (s, sep, nextp))) +# endif +# 1146 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +__STRING_INLINE char *__strtok_r_1c (char *__s, char __sep, char **__nextp); +__STRING_INLINE char * +__strtok_r_1c (char *__s, char __sep, char **__nextp) +{ + char *__result; + if (__s == NULL) + __s = *__nextp; + while (*__s == __sep) + ++__s; + __result = NULL; + if (*__s != '\0') + { + __result = __s++; + while (*__s != '\0') + if (*__s++ == __sep) + { + __s[-1] = '\0'; + break; + } + } + *__nextp = __s; + return __result; +} +# if defined __USE_POSIX || defined __USE_MISC +# define strtok_r(s, sep, nextp) __strtok_r (s, sep, nextp) +# endif +# 1173 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 1174 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + + +#if !defined _HAVE_STRING_ARCH_strsep || defined _FORCE_INLINES +# ifndef _HAVE_STRING_ARCH_strsep + +extern char *__strsep_g (char **__stringp, const char *__delim); +# define __strsep(s, reject) \ + __extension__ \ + ({ char __r0, __r1, __r2; \ + (__builtin_constant_p (reject) && __string2_1bptr_p (reject) \ + && (__r0 = ((const char *) (reject))[0], \ + ((const char *) (reject))[0] != '\0') \ + ? ((__r1 = ((const char *) (reject))[1], \ + ((const char *) (reject))[1] == '\0') \ + ? __strsep_1c (s, __r0) \ + : ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \ + ? __strsep_2c (s, __r0, __r1) \ + : (((const char *) (reject))[3] == '\0' \ + ? __strsep_3c (s, __r0, __r1, __r2) \ + : __strsep_g (s, reject)))) \ + : __strsep_g (s, reject)); }) +# endif +# 1196 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +__STRING_INLINE char *__strsep_1c (char **__s, char __reject); +__STRING_INLINE char * +__strsep_1c (char **__s, char __reject) +{ + char *__retval = *__s; + if (__retval != NULL && (*__s = strchr (__retval, __reject)) != NULL) + *(*__s)++ = '\0'; + return __retval; +} + +__STRING_INLINE char *__strsep_2c (char **__s, char __reject1, char __reject2); +__STRING_INLINE char * +__strsep_2c (char **__s, char __reject1, char __reject2) +{ + char *__retval = *__s; + if (__retval != NULL) + { + char *__cp = __retval; + while (1) + { + if (*__cp == '\0') + { + __cp = NULL; + break; + } + if (*__cp == __reject1 || *__cp == __reject2) + { + *__cp++ = '\0'; + break; + } + ++__cp; + } + *__s = __cp; + } + return __retval; +} + +__STRING_INLINE char *__strsep_3c (char **__s, char __reject1, char __reject2, + char __reject3); +__STRING_INLINE char * +__strsep_3c (char **__s, char __reject1, char __reject2, char __reject3) +{ + char *__retval = *__s; + if (__retval != NULL) + { + char *__cp = __retval; + while (1) + { + if (*__cp == '\0') + { + __cp = NULL; + break; + } + if (*__cp == __reject1 || *__cp == __reject2 || *__cp == __reject3) + { + *__cp++ = '\0'; + break; + } + ++__cp; + } + *__s = __cp; + } + return __retval; +} +# ifdef __USE_BSD +# define strsep(s, reject) __strsep (s, reject) +# endif +# 1264 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +#endif +# 1265 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +/* We need the memory allocation functions for inline strdup(). + Referring to stdlib.h (even minimally) is not allowed + in any of the tight standards compliant modes. */ +#ifdef __USE_MISC + +# if !defined _HAVE_STRING_ARCH_strdup || !defined _HAVE_STRING_ARCH_strndup +# define __need_malloc_and_calloc +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 1273 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# 1274 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 1275 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# ifndef _HAVE_STRING_ARCH_strdup + +extern char *__strdup (const char *__string) __THROW __attribute_malloc__; +# define __strdup(s) \ + (__extension__ (__builtin_constant_p (s) && __string2_1bptr_p (s) \ + ? (((const char *) (s))[0] == '\0' \ + ? (char *) calloc ((size_t) 1, (size_t) 1) \ + : ({ size_t __len = strlen (s) + 1; \ + char *__retval = (char *) malloc (__len); \ + if (__retval != NULL) \ + __retval = (char *) memcpy (__retval, s, __len); \ + __retval; })) \ + : __strdup (s))) + +# if defined __USE_SVID || defined __USE_BSD || defined __USE_XOPEN_EXTENDED +# define strdup(s) __strdup (s) +# endif +# 1293 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 1294 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +# ifndef _HAVE_STRING_ARCH_strndup + +extern char *__strndup (const char *__string, size_t __n) + __THROW __attribute_malloc__; +# define __strndup(s, n) \ + (__extension__ (__builtin_constant_p (s) && __string2_1bptr_p (s) \ + ? (((const char *) (s))[0] == '\0' \ + ? (char *) calloc ((size_t) 1, (size_t) 1) \ + : ({ size_t __len = strlen (s) + 1; \ + size_t __n = (n); \ + char *__retval; \ + if (__n < __len) \ + __len = __n + 1; \ + __retval = (char *) malloc (__len); \ + if (__retval != NULL) \ + { \ + __retval[__len - 1] = '\0'; \ + __retval = (char *) memcpy (__retval, s, \ + __len - 1); \ + } \ + __retval; })) \ + : __strndup (s, n))) + +# ifdef __USE_GNU +# define strndup(s, n) __strndup (s, n) +# endif +# 1321 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# endif +# 1322 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#endif /* Use misc. or use GNU. */ +# 1324 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#ifndef _FORCE_INLINES +# undef __STRING_INLINE +#endif +# 1328 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 + +#endif /* No string inlines. */ +# 1330 "/usr/include/x86_64-linux-gnu/bits/string2.h" 3 4 +# 636 "/usr/include/string.h" 2 3 4 +# endif +# 637 "/usr/include/string.h" 3 4 + +# if __USE_FORTIFY_LEVEL > 0 && defined __fortify_function +/* Functions with security checks. */ +#if 0 /* expanded by -frewrite-includes */ +# include +#endif /* expanded by -frewrite-includes */ +# 640 "/usr/include/string.h" 3 4 +# 641 "/usr/include/string.h" 3 4 +# endif +# 642 "/usr/include/string.h" 3 4 +#endif +# 643 "/usr/include/string.h" 3 4 + +__END_DECLS + +#endif /* string.h */ +# 647 "/usr/include/string.h" 3 4 +# 5 "oski.c" 2 +#if 0 /* expanded by -frewrite-includes */ +#include "util.h" +#endif /* expanded by -frewrite-includes */ +# 5 "oski.c" +# 1 "./util.h" 1 +/** + * BSD 3-Clause License + * + * Copyright (c) 2017, Peter Ahrens All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef UTIL_H +#define UTIL_H +//lo is inclusive, hi is exclusive + +static inline int max(int a, int b) {return a > b ? a : b;}; +static inline int min(int a, int b) {return a < b ? a : b;}; +void random_seed (unsigned long seed); +int random_range (int *stuff, int n, int lo, int hi); +double random_uniform (); +void sort (int *stuff, int n); +int search (const int *stuff, int lo, int hi, int key); +int search_strict (const int *stuff, int lo, int hi, int key); +#endif +# 46 "./util.h" +# 6 "oski.c" 2 + +#if 0 /* expanded by -frewrite-includes */ +#include +#endif /* expanded by -frewrite-includes */ +# 7 "oski.c" +# 1 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/cilk/cilk.h" 1 3 +/* cilk.h -*-C++-*- + * + * @copyright + * Copyright (C) 2010-2013, Intel Corporation + * All rights reserved. + * + * @copyright + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * @copyright + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY + * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/** @file cilk.h + * + * @brief Provides convenient aliases for the Cilk language keywords. + * + * @details + * Since Cilk is a nonstandard extension to both C and C++, the Cilk + * language keywords all begin with “`_Cilk_`”, which guarantees that they + * will not conflict with user-defined identifiers in properly written + * programs, so that “standard” C and C++ programs can safely be + * compiled a Cilk-enabled C or C++ compiler. + * + * However, this means that the keywords _look_ like something grafted on to + * the base language. Therefore, you can include this header: + * + * #include "cilk/cilk.h" + * + * and then write the Cilk keywords with a “`cilk_`” prefix instead of + * “`_Cilk_`”. + * + * @ingroup language + */ + + +/** @defgroup language Language Keywords + * Definitions having to do with the Cilk language. + * @{ + */ + +#ifndef cilk_spawn +# define cilk_spawn _Cilk_spawn ///< Spawn a task that can execute in parallel. +# define cilk_sync _Cilk_sync ///< Wait for spawned tasks to complete. +# define cilk_for _Cilk_for ///< Execute iterations of a for loop in parallel. +#endif +# 70 "/data/scratch/hjxu/tapir/src/build/lib/clang/6.0.0/include/cilk/cilk.h" 3 + +/// @} +# 8 "oski.c" 2 + +char * name() { + return "oski"; +} + +/** + * Given an m by n CSR matrix A, estimates the fill ratio if the matrix were + * converted into b_r by b_c BCSR format. The fill ratio is b_r times b_c times + * the number of nonzero blocks in the BCSR format divided by the number of + * nonzeros. For each setting of b_r, block rows are completely examined with + * probability sigma. + * + * The caller supplies this routine with a maximum row and column block size B, + * and this routine returns the estimated fill ratios for all + * 1 <= b_r, b_c <= B. + * + * This routine assumes the CSR matrix uses full storage, and assumes that + * column indicies are sorted. + * + * \param[in] m Logical number of matrix rows + * \param[in] n Logical number of matrix columns + * \param[in] nnz Logical number of matrix nonzeros + * \param[in] *ptr CSR row pointers. + * \param[in] *ind CSR column indices. + * \param[in] B Maximum desired block size + * \param[in] epsilon Epsilon + * \param[in] delta Delta + * \param[in] sigma Sigma + * \param[out] *fill Fill ratios for all specified b_r, b_c in order + * \param[in] verbose 0 if you should be quiet + * + * Note that the fill ratios should be stored according to the following order: + * int fill_index = 0; + * for (int b_r = 1; b_r <= B; b_r++) { + * for (int b_c = 1; b_c <= B; b_c++) { + * fill[fill_index] = fill for b_r, b_c + * fill_index++; + * } + * } + * + * \returns On success, returns 0. On error, returns an error code. + */ +int estimate_fill (int m, + int n, + int nnz, + const int *ptr, + const int *ind, + int B, + double epsilon, + double delta, + double sigma, + double *fill, + int verbose){ + assert(n >= 1); + assert(m >= 1); + + /* blocks + (c - 1) * n stores previously seen column block indicies in the + * current block row when b_c = c. + */ + int *blocks = (int*)malloc(sizeof(int) * B * n); + assert(blocks != NULL); + memset(blocks, 0, sizeof(int) * B * n); + + /* K[(c - 1)] counts distinct column block indicies in the current block row + * when b_c = c. + */ + int K[B]; + + /* see above note about fill order */ + int fill_index = 0; + + for (int r = 1; r <= B; r++) { + + /* M is the number of block rows */ + int M = m / r; + + /* stores the number of examined nonzeros */ + int S = 0; + + for (int c = 1; c <= B; c++){ + K[c - 1] = 0; + } + + // CHECK: %[[SYNCREGION:.+]] = {{.*}}call token @llvm.syncregion.start() + /* loop over block rows */ + cilk_for (int I = 0; I < M; I++) { + // CHECK: detach within %[[SYNCREGION]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] + // CHECK: [[PFORBODY]]: + // CHECK: br i1 %{{.+}}, label %[[PFORPREATTACH:.+]], label + + /* examine the block row with probability sigma */ + if (random_uniform() > sigma) { + continue; + } else { + + /* Count the blocks in block row I, using "blocks" to remember the + * blocks that have been seen so far for each block column width "c". + */ + for (int i = I * r; i < (I + 1) * r; i++) { + for (int t = ptr[i]; t < ptr[i + 1]; t++) { + int j = ind[t]; + + for (int c = 1; c <= B; c++) { + /* "J" is the block column index */ + int J = j / c; + + /* if the block has not yet been seen, count it */ + if (blocks[(c - 1) * n + J] == 0) { + blocks[(c - 1) * n + J] = 1; + K[c - 1]++; + } + } + } + } + } + S += ptr[(I + 1) * r] - ptr[I * r]; + + /* + * Reset "blocks" for the next block row. We loop over the nonzeros + * instead of calling "memset" in order to keep the complexity to O(nnz). + */ + for (int i = I * r; i < (I + 1) * r; i++) { + for (int t = ptr[i]; t < ptr[i + 1]; t++) { + int j = ind[t]; + + for (int c = 1; c <= B; c++) { + /* "J" is the block column index */ + int J = j / c; + blocks[(c - 1) * n + J] = 0; + } + } + } + } + // CHECK: [[PFORPREATTACH]]: + // CHECK: reattach within %[[SYNCREGION]] + + // CHECK: [[PFORINC]]: + // CHECK: br i1 + + // CHECK-NOT: label %{{[[PFORPREATTACH]]\b}} + // CHECK: return + + /* + * Compute the fill from the number of blocks and nonzeros that have been + * seen in the sample. + */ + for (int c = 1; c <= B; c++) { + if (!S) + fill[fill_index] = K[c - 1] ? (1.0 / 0.0) : 1.0; + else + fill[fill_index] = ((double)K[c - 1] * r * c) / S; + fill_index++; + } + } + + free(blocks); + return 0; +} diff --git a/clang/test/Cilk/cilkfor-detach-unwind-rewrite.cpp b/clang/test/Cilk/cilkfor-detach-unwind-rewrite.cpp new file mode 100644 index 00000000000000..f2d71f0bc196df --- /dev/null +++ b/clang/test/Cilk/cilkfor-detach-unwind-rewrite.cpp @@ -0,0 +1,50 @@ +// Check that detaches with unwind destinations are properly inserted +// during CodeGen. +// +// RUN: %clang_cc1 %s -fopencilk -fsanitize=signed-integer-overflow,unsigned-integer-overflow -fcxx-exceptions -fexceptions -emit-llvm -fsanitize-recover=signed-integer-overflow,unsigned-integer-overflow -disable-llvm-passes -o - | FileCheck %s + +unsigned __cilkrts_get_nworkers(void); + +template class Reducer_sum { +public: + Reducer_sum(long num_workers); + ~Reducer_sum(); + void add(T new_value); + T get() const; +}; + +int main() { + Reducer_sum total_red(__cilkrts_get_nworkers()); + _Cilk_for(long i = 0; i < 100; i++) { total_red.add(5); } + return total_red.get(); +} + +// CHECK: define {{.*}}i32 @main() +// CHECK: br i1 %{{.*}}, label %[[PFOR_PH:.+]], label %[[PFOR_END:[a-z0-9._]+]] + +// CHECK: [[PFOR_PH]]: +// CHECK: call void @__ubsan_handle_sub_overflow +// CHECK: call void @__ubsan_handle_sub_overflow +// CHECK: call void @__ubsan_handle_divrem_overflow +// CHECK: call void @__ubsan_handle_add_overflow + +// Check contents of the detach block +// CHECK: load i64, ptr %[[INIT:.+]] +// CHECK: load i64, ptr %[[BEGIN:.+]] +// CHECK: call { i64, i1 } @llvm.smul.with.overflow.i64( +// CHECK: br i1 %{{.*}}, label %[[CONT5:.+]], label %[[HANDLE_MUL_OVERFLOW:[a-z0-9._]+]], + +// CHECK: [[HANDLE_MUL_OVERFLOW]]: +// CHECK: call void @__ubsan_handle_mul_overflow + +// CHECK: [[CONT5]]: +// CHECK: call { i64, i1 } @llvm.sadd.with.overflow.i64( +// CHECK: br i1 %{{.*}}, label %[[CONT7:.+]], label %[[HANDLE_ADD_OVERFLOW:[a-z0-9._]+]], + +// CHECK: [[HANDLE_ADD_OVERFLOW]]: +// CHECK: call void @__ubsan_handle_add_overflow + +// Check that the detach ends up after the loop-variable init expression. + +// CHECK: [[CONT7]]: +// CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[PFOR_BODY_ENTRY:.+]], label %[[PFOR_INC:.+]] unwind label %[[LPAD9:.+]] diff --git a/clang/test/Cilk/cilkfor-pgo.cpp b/clang/test/Cilk/cilkfor-pgo.cpp new file mode 100644 index 00000000000000..3321f8fee5d5b6 --- /dev/null +++ b/clang/test/Cilk/cilkfor-pgo.cpp @@ -0,0 +1,24 @@ +// Check that -fprofile-instrument generates atomic +// instrumentation instructions inside of _Cilk_for loops. +// +// Credit to Brian Wheatman for the original source of this test. +// +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fprofile-instrument=clang -fprofile-update=atomic %s -S -emit-llvm -fopencilk -ftapir=none -o - 2>&1 | FileCheck %s +// expected-no-diagnostics + +int main() { + int sum = 0; + _Cilk_for(int i = 0; i < 1000000; i++) { sum += i; } + + return sum; +} + +// CHECK: @__profc_main = {{.*}}global [2 x i64] zeroinitializer, section "__llvm_prf_cnts" + +// CHECK-LABEL: define {{.*}}i32 @main() + +// CHECK: detach within %{{.+}}, label %[[PFOR_BODY:.+]], label %[[PFOR_INC:.+]] + +// CHECK: [[PFOR_BODY]]: +// CHECK: atomicrmw add ptr getelementptr inbounds ([2 x i64], ptr @__profc_main, i32 0, i32 1), i64 1 monotonic +// CHECK: reattach within %{{.+}}, label %[[PFOR_INC]] diff --git a/clang/test/Cilk/cilkfor-pointer.c b/clang/test/Cilk/cilkfor-pointer.c new file mode 100644 index 00000000000000..e5e6f22fadf0a2 --- /dev/null +++ b/clang/test/Cilk/cilkfor-pointer.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only + +// Make sure the front end accepts pointer loop variables. +long cilk_for_pointer_type(const long *begin, const long *end) +{ + _Cilk_for (const long *p = begin; p != end; ++p) + ; // expected-warning@-1{{Cilk for loop has empty body}} + return 0; +} diff --git a/clang/test/Cilk/cilksan-O0.c b/clang/test/Cilk/cilksan-O0.c new file mode 100644 index 00000000000000..e4d96fe4607646 --- /dev/null +++ b/clang/test/Cilk/cilksan-O0.c @@ -0,0 +1,38 @@ +// Verify that proper Cilksan instrumentation is inserted when a Cilk code is +// compiled at -O0. +// +// Thanks to I-Ting Angelina Lee for contributing this test case. +// +// RUN: %clang_cc1 %s -std=c99 -triple x86_64-unknown-linux-gnu -O0 -fopencilk -fsanitize=cilk -ftapir=none -verify -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +int a = 0; +int b = 0; +int c = 2; + +void addA() { + // CHECK: define {{.*}}void @addA() + // CHECK: __csan_func_entry(i64 {{.+}}, ptr {{.+}}, ptr {{.+}}, i64 0) + a = c; +} + +void addB() { + // CHECK: define {{.*}}void @addB() + // CHECK: __csan_func_entry(i64 {{.+}}, ptr {{.+}}, ptr {{.+}}, i64 0) + b = a; +} + +void foo() { + // CHECK: define {{.*}}void @foo() + // CHECK: __csan_func_entry(i64 {{.+}}, ptr {{.+}}, ptr {{.+}}, i64 257) + _Cilk_spawn addA(); + addB(); + _Cilk_sync; +} + +int main() { + // CHECK: define {{.*}}i32 @main() + // CHECK: __csan_func_entry(i64 {{.+}}, ptr {{.+}}, ptr {{.+}}, i64 0) + foo(); + return 0; +} diff --git a/clang/test/Cilk/cilkscope-checks.c b/clang/test/Cilk/cilkscope-checks.c new file mode 100644 index 00000000000000..ad7c0a385b25f0 --- /dev/null +++ b/clang/test/Cilk/cilkscope-checks.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -fopencilk -fsyntax-only -verify %s + +void bar(int x); + +void foo(int x) { + goto lbl1; // expected-error{{cannot jump from this goto statement to its label}} + _Cilk_scope { // expected-note{{jump bypasses '_Cilk_scope'}} + _Cilk_spawn bar(x); + lbl1: + bar(x-1); + } + bar(x+1); +} diff --git a/clang/test/Cilk/cilkscope.c b/clang/test/Cilk/cilkscope.c new file mode 100644 index 00000000000000..dd4f30b9108a91 --- /dev/null +++ b/clang/test/Cilk/cilkscope.c @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -O0 -fopencilk -verify -S -emit-llvm -ftapir=none -o - | FileCheck %s +// expected-no-diagnostics + +void bar(int x); + +void foo(int x) { + _Cilk_scope { + _Cilk_spawn bar(x); + if (x < 1) + return; + bar(x-1); + } + bar(x+1); +} + +// CHECK: define {{.*}}void @foo( + +// CHECK: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void @bar( +// CHECK: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: %[[CMP:.+]] = icmp slt i32 %{{.+}}, 1 +// CHECK-NEXT: br i1 %[[CMP]], label %[[IF_THEN:.+]], label %[[IF_END:.+]] + +// CHECK: [[IF_THEN]]: +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNC_CONT:.+]] + +// CHECK: [[IF_END]]: +// CHECK: call void @bar( +// CHECK: br label %[[CLEANUP:.+]] + +// CHECK: [[CLEANUP]]: +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNC_CONT2:.+]] + +// CHECK: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) + +// CHECK: call void @bar( + +// CHECK: ret void + diff --git a/clang/test/Cilk/clangchecks.cpp b/clang/test/Cilk/clangchecks.cpp new file mode 100644 index 00000000000000..b1046c0bbc907b --- /dev/null +++ b/clang/test/Cilk/clangchecks.cpp @@ -0,0 +1,70 @@ +// RUN: %clang_cc1 -std=c++1z -fopencilk -fsyntax-only -verify %s + +class Bar { + int val[4] = {0,0,0,0}; +public: + Bar(); + ~Bar(); + Bar(const Bar &that); + Bar(Bar &&that); + Bar &operator=(Bar that); + friend void swap(Bar &left, Bar &right); + + const int &getVal(int i) const { return val[i]; } + void incVal(int i) { val[i]++; } +}; + +int bar(int n); + +int x = _Cilk_spawn 0; // expected-error{{'_Cilk_spawn' cannot be used outside a function}} + +int illegal_spawn_uses(int n) { + // FIXME: Fails an assertion during codegen. + // int x = _Cilk_spawn 0; + + Bar Arrb[4] = _Cilk_spawn { Bar(), Bar(), Bar(), Bar() }; // expected-error{{expected expression}} + + if (int i = _Cilk_spawn bar(n)) // expected-error{{'_Cilk_spawn' not allowed in this scope}} + bar(i); + + if ((_Cilk_spawn bar(n))) // expected-error{{'_Cilk_spawn' not allowed in this scope}} + bar(n); + + for (int i = _Cilk_spawn bar(n); i < n; ++i) { // expected-error{{'_Cilk_spawn' not allowed in this scope}} + bar(i); + } + + for (int i = 0; i < n; ++i) + _Cilk_spawn break; // expected-error{{'break' statement not in loop or switch statement}} + + for (int i = 0; i < n; ++i) { + _Cilk_spawn break; // expected-error{{'break' statement not in loop or switch statement}} + } + + return _Cilk_spawn bar(n); // expected-warning{{no parallelism from a '_Cilk_spawn' in a return statement}} +} + +void bad_jumps_spawn(int n) { + label3: bar(n); + goto label2; // expected-error{{cannot jump from this goto statement to its label}} + + _Cilk_spawn { // expected-note{{jump bypasses '_Cilk_spawn'}} + label1: bar(n); + label2: bar(n); + goto label1; + goto label3; // expected-error{{cannot jump out of '_Cilk_spawn' statement}} + }; + + _Cilk_spawn goto label1; // expected-error{{use of undeclared label}} +} + + +void bad_jumps_cilk_for(int n) { + label3: bar(n); + goto label2; // expected-error{{cannot jump from this goto statement to its label}} + + _Cilk_for(int i = 0; i < n; ++i) { // expected-note{{jump bypasses '_Cilk_for'}} expected-note{{jump bypasses variable initialization}} + label2: bar(i); + goto label3; // expected-error{{cannot jump out of '_Cilk_for' statement}} + } +} diff --git a/clang/test/Cilk/constructors.cpp b/clang/test/Cilk/constructors.cpp new file mode 100644 index 00000000000000..995c1d1ff427a7 --- /dev/null +++ b/clang/test/Cilk/constructors.cpp @@ -0,0 +1,574 @@ +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-O0 +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -O1 -mllvm -simplify-taskframes=false -emit-llvm %s -o - | FileCheck %s --check-prefixes CHECK,CHECK-O1 +// expected-no-diagnostics + +class Baz { +public: + Baz(); + ~Baz(); + Baz(const Baz &that); + Baz(Baz &&that); + Baz &operator=(Baz that); + friend void swap(Baz &left, Baz &right); +}; + +class Bar { + int val[4] = {0,0,0,0}; +public: + Bar(); + ~Bar(); + Bar(const Bar &that); + Bar(Bar &&that); + Bar &operator=(Bar that); + friend void swap(Bar &left, Bar &right); + + Bar(const Baz &that); + + const int &getVal(int i) const { return val[i]; } + void incVal(int i) { val[i]++; } +}; + +class DBar : public Bar { +public: + DBar(); + ~DBar(); + DBar(const DBar &that); + DBar(DBar &&that); + DBar &operator=(DBar that); + friend void swap(DBar &left, DBar &right); +}; + +int foo(const Bar &b); + +Bar makeBar(); +void useBar(Bar b); + +DBar makeDBar(); +DBar makeDBarFromBar(Bar b); + +Baz makeBaz(); +Baz makeBazFromBar(Bar b); + +void rule_of_four() { + // CHECK-LABEL: define {{.*}}void @_Z12rule_of_fourv() + Bar b0; + Bar b5(_Cilk_spawn makeBar()); + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[TFLPAD:.+]] + // CHECK: [[DETACHED]]: + // CHECK: invoke void @_Z7makeBarv(ptr {{.*}}sret(%class.Bar) {{.*}}%[[b5:.+]]) + // CHECK-NEXT: to label %[[REATTACH:.+]] unwind label %[[DETLPAD:.+]] + // CHECK: [[REATTACH]]: + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + // CHECK: [[CONTINUE]]: + Bar b4 = _Cilk_spawn makeBar(); + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[TFLPAD2:.+]] + // CHECK: [[DETACHED2]]: + // CHECK: invoke void @_Z7makeBarv(ptr {{.*}}sret(%class.Bar) {{.*}}%[[b4:.+]]) + // CHECK-NEXT: to label %[[REATTACH2:.+]] unwind label %[[DETLPAD2:.+]] + // CHECK: [[REATTACH2]]: + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + // CHECK: [[CONTINUE2]]: + b0 = _Cilk_spawn makeBar(); + // CHECK: %[[TASKFRAME3:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[AGGTMP:.+]] = alloca %class.Bar + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[TFLPAD3:.+]] + // CHECK: [[DETACHED3]]: + // CHECK: invoke void @_Z7makeBarv(ptr {{.*}}sret(%class.Bar) {{.*}}%[[AGGTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[DETLPAD3:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: %[[CALL:.+]] = invoke {{.*}}dereferenceable(16) ptr @_ZN3BaraSES_(ptr {{.*}}dereferenceable(16) %[[b0:.+]], ptr {{.*}}%[[AGGTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[DETLPAD3_2:.+]] + // CHECK: [[INVOKECONT2]]: + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + // CHECK: [[CONTINUE3]]: + _Cilk_spawn useBar(b0); + // CHECK-O0: %[[TASKFRAME4:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[AGGTMP2:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1ERKS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]], ptr {{.*}}dereferenceable(16) %[[b0:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[TFLPAD4:.+]] + // CHECK: [[INVOKECONT3]]: + // CHECK-O0: detach within %[[SYNCREG]], label %[[DETACHED4:.+]], label %[[CONTINUE4:.+]] unwind label %[[TFLPAD4:.+]] + // CHECK-O0: [[DETACHED4]]: + // CHECK: invoke void @_Z6useBar3Bar(ptr {{.*}}%[[AGGTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[DETLPAD4:.+]] + // CHECK: [[INVOKECONT4]]: + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]]) + // CHECK-O0-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE4]] + // CHECK-O0: [[CONTINUE4]]: + + // CHECK: [[DETLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TFLPAD]] + + // CHECK: [[TFLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD2]] + + // CHECK: [[TFLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: [[DETLPAD3_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD3]] + + // CHECK: [[TFLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD4]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK-O0: br label %[[EHCLEANUP:.+]] + + // CHECK-O0: [[DETLPAD4]]: + // CHECK-O0-NEXT: landingpad + // CHECK-O0-NEXT: cleanup + // CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD4]] + + // CHECK-O0: [[EHCLEANUP]]: + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME4]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind +} + +void derived_class() { + // CHECK-LABEL: define {{.*}}void @_Z13derived_classv() + Bar b0, b6, b7; + Bar b8 = _Cilk_spawn makeDBar(), b2 = _Cilk_spawn makeDBarFromBar(b0); + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP:.+]] = alloca %class.DBar + // CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[TFLPAD:.+]] + // CHECK: [[DETACHED]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %[[REFTMP]]) + // CHECK: invoke void @_Z8makeDBarv(ptr {{.*}}sret(%class.DBar) {{.*}}%[[REFTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[DETLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: invoke void @_ZN3BarC1EOS_(ptr {{.*}}dereferenceable(16) %[[b8:.+]], ptr {{.*}}dereferenceable(16) %[[REFTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[DETLPAD_2:.+]] + // CHECK: [[INVOKECONT2]]: + // CHECK-NEXT: call void @_ZN4DBarD1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %[[REFTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + // CHECK: [[CONTINUE]]: + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP2:.+]] = alloca %class.DBar + // CHECK: %[[AGGTMP:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1ERKS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP]], ptr {{.*}}dereferenceable(16) %[[b0:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[TFLPAD2:.+]] + // CHECK: [[INVOKECONT3]]: + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[TFLPAD2]] + // CHECK: [[DETACHED2]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %[[REFTMP2]]) + // CHECK: invoke void @_Z15makeDBarFromBar3Bar(ptr {{.*}}sret(%class.DBar) {{.*}}%[[REFTMP2]], ptr {{.*}}%[[AGGTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[DETLPAD2:.+]] + // CHECK: [[INVOKECONT4]]: + // CHECK-NEXT: invoke void @_ZN3BarC1EOS_(ptr {{.*}}dereferenceable(16) %[[b2:.+]], ptr {{.*}}dereferenceable(16) %[[REFTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT5:.+]] unwind label %[[DETLPAD2_2:.+]] + // CHECK: [[INVOKECONT5]]: + // CHECK-NEXT: call void @_ZN4DBarD1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP2]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %[[REFTMP2]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + // CHECK: [[CONTINUE2]]: + b6 = _Cilk_spawn makeDBarFromBar(b7); + // CHECK-O0: %[[TASKFRAME3:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[AGGTMP3:.+]] = alloca %class.Bar + // CHECK: %[[REFTMP3:.+]] = alloca %class.DBar + // CHECK: %[[AGGTMP2:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1ERKS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]], ptr {{.*}}dereferenceable(16) %[[b7:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT6:.+]] unwind label %[[TFLPAD3:.+]] + // CHECK: [[INVOKECONT6]]: + // CHECK-O0: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[TFLPAD3]] + // CHECK-O0: [[DETACHED3]]: + // CHECK-O0: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %[[REFTMP3]]) + // CHECK: invoke void @_Z15makeDBarFromBar3Bar(ptr {{.*}}sret(%class.DBar) {{.*}}%[[REFTMP3]], ptr {{.*}}%[[AGGTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT7:.+]] unwind label %[[DETLPAD3:.+]] + // CHECK: [[INVOKECONT7]]: + // CHECK-NEXT: invoke {{.*}}void @_ZN3BarC1EOS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]], ptr {{.*}}dereferenceable(16) %[[REFTMP3]]) + // CHECK-NEXT: to label %[[INVOKECONT8:.+]] unwind label %[[DETLPAD3_2:.+]] + // CHECK: [[INVOKECONT8]]: + // CHECK-NEXT: %[[CALL:.+]] = invoke {{.*}}dereferenceable(16) ptr @_ZN3BaraSES_(ptr {{.*}}dereferenceable(16) %[[b6:.+]], ptr {{.*}}%[[AGGTMP3]]) + // CHECK-NEXT: to label %[[INVOKECONT9:.+]] unwind label %[[DETLPAD3_3:.+]] + // CHECK: [[INVOKECONT9]]: + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]]) + // CHECK-NEXT: call void @_ZN4DBarD1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP3]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %[[REFTMP3]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]]) + // CHECK-O0-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + // CHECK-O0: [[CONTINUE3]]: + + // CHECK: [[DETLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TFLPAD]] + + // CHECK: [[TFLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK-O1-NEXT: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD2_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD2]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3_3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD3]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind +} + +void two_classes() { + // CHECK-LABEL: define {{.*}}void @_Z11two_classesv() + Bar b9, b11; + Bar b12 = _Cilk_spawn makeBaz(); + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP:.+]] = alloca %class.Baz + // CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[TFLPAD:.+]] + // CHECK: [[DETACHED]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[REFTMP]]) + // CHECK: invoke void @_Z7makeBazv(ptr {{.*}}sret(%class.Baz) {{.*}}%[[REFTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[DETLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: invoke void @_ZN3BarC1ERK3Baz(ptr {{.*}}dereferenceable(16) %[[b12:.+]], ptr {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[DETLPAD_2:.+]] + // CHECK: [[INVOKECONT2]]: + // CHECK-NEXT: call void @_ZN3BazD1Ev(ptr {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %[[REFTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + // CHECK: [[CONTINUE]]: + Bar b13 = _Cilk_spawn makeBazFromBar(b9); + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP2:.+]] = alloca %class.Baz + // CHECK: %[[AGGTMP:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1ERKS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP]], ptr {{.*}}dereferenceable(16) %[[b9:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[TFLPAD2:.+]] + // CHECK: [[INVOKECONT3]]: + // CHECK-NEXT: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[TFLPAD2]] + // CHECK: [[DETACHED2]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[REFTMP2]]) + // CHECK: invoke void @_Z14makeBazFromBar3Bar(ptr {{.*}}sret(%class.Baz) {{.*}}%[[REFTMP2]], ptr {{.*}}%[[AGGTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[DETLPAD2:.+]] + // CHECK: [[INVOKECONT4]]: + // CHECK-NEXT: invoke void @_ZN3BarC1ERK3Baz(ptr {{.*}}dereferenceable(16) %[[b13:.+]], ptr {{.*}}dereferenceable(1) %[[REFTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT5:.+]] unwind label %[[DETLPAD2_2:.+]] + // CHECK: [[INVOKECONT5]]: + // CHECK-NEXT: call void @_ZN3BazD1Ev(ptr {{.*}}dereferenceable(1) %[[REFTMP2]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %[[REFTMP2]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + // CHECK: [[CONTINUE2]]: + b9 = _Cilk_spawn makeBazFromBar(b11); + // CHECK-O0: %[[TASKFRAME3:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[AGGTMP3:.+]] = alloca %class.Bar + // CHECK: %[[REFTMP3:.+]] = alloca %class.Baz + // CHECK: %[[AGGTMP2:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1ERKS_(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]], ptr {{.*}}dereferenceable(16) %[[b11:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT6:.+]] unwind label %[[TFLPAD3:.+]] + // CHECK: [[INVOKECONT6]]: + // CHECK-O0-NEXT: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[TFLPAD3]] + // CHECK-O0: [[DETACHED3]]: + // CHECK-O0: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[REFTMP3]]) + // CHECK: invoke void @_Z14makeBazFromBar3Bar(ptr {{.*}}sret(%class.Baz) {{.*}}%[[REFTMP3]], ptr {{.*}}%[[AGGTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT7:.+]] unwind label %[[DETLPAD3:.+]] + // CHECK: [[INVOKECONT7]]: + // CHECK-NEXT: invoke void @_ZN3BarC1ERK3Baz(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]], ptr {{.*}}dereferenceable(1) %[[REFTMP3]]) + // CHECK-NEXT: to label %[[INVOKECONT8:.+]] unwind label %[[DETLPAD3_2:.+]] + // CHECK: [[INVOKECONT8]]: + // CHECK-NEXT: %[[CALL:.+]] = invoke {{.*}}dereferenceable(16) ptr @_ZN3BaraSES_(ptr {{.*}}dereferenceable(16) %[[b9:.+]], ptr {{.*}}%[[AGGTMP3]]) + // CHECK-NEXT: to label %[[INVOKECONT9:.+]] unwind label %[[DETLPAD3_3:.+]] + // CHECK: [[INVOKECONT9]]: + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]]) + // CHECK-NEXT: call void @_ZN3BazD1Ev(ptr {{.*}}dereferenceable(1) %[[REFTMP3]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %[[REFTMP3]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]]) + // CHECK-O0-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + // CHECK-O0: [[CONTINUE3]]: + + // CHECK: [[DETLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TFLPAD]] + + // CHECK: [[TFLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK-O1-NEXT: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD2_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD2]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD3_3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD3]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind +} + +void array_out() { + // CHECK-LABEL: define {{.*}}void @_Z9array_outv() + // int Arri[5]; + // Example that produces a BinAssign expr. + // bool Assign0 = (Arri[0] = foo(makeBazFromBar((Bar())))); + // Pretty sure the following just isn't legal Cilk. + // bool Assign1 = (Arri[1] = _Cilk_spawn foo(makeBazFromBar((Bar())))); + + Bar ArrBar[5]; + // ArrBar[0] = makeBazFromBar((Bar())); + ArrBar[1] = _Cilk_spawn makeBazFromBar((Bar())); + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[AGGTMP2:.+]] = alloca %class.Bar + // CHECK: %[[REFTMP:.+]] = alloca %class.Baz + // CHECK: %[[AGGTMP:.+]] = alloca %class.Bar + // CHECK: %[[ARRIDX:.+]] = getelementptr inbounds [5 x %class.Bar], ptr %[[ArrBar:.+]], i64 0, i64 1 + // CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[TFLPAD:.+]] + // CHECK: [[INVOKECONT]]: + // CHECK-NEXT: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[TFLPAD]] + // CHECK: [[DETACHED]]: + // CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[REFTMP]]) + // CHECK: invoke void @_Z14makeBazFromBar3Bar(ptr {{.*}}sret(%class.Baz) {{.*}}%[[REFTMP]], ptr {{.*}}%[[AGGTMP]]) + // CHECK: to label %[[INVOKECONT2:.+]] unwind label %[[DETLPAD:.+]] + // CHECK: [[INVOKECONT2]]: + // CHECK-NEXT: invoke void @_ZN3BarC1ERK3Baz(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]], ptr {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[DETLPAD_2:.+]] + // CHECK: [[INVOKECONT3]]: + // CHECK-NEXT: %[[CALL:.+]] = invoke {{.*}}dereferenceable(16) ptr @_ZN3BaraSES_(ptr {{.*}}dereferenceable(16) %[[ARRIDX]], ptr {{.*}}%[[AGGTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[DETLPAD_3:.+]] + // CHECK: [[INVOKECONT4]]: + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP2]]) + // CHECK-NEXT: call void @_ZN3BazD1Ev(ptr {{.*}}dereferenceable(1) %[[REFTMP]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %[[REFTMP]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP]]) + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + // CHECK: [[CONTINUE]]: + + // List initialization + // Bar ListBar1[3] = { Bar(), makeBar(), makeBazFromBar((Bar())) }; + Bar ListBar2[3] = { _Cilk_spawn Bar(), _Cilk_spawn makeBar(), _Cilk_spawn makeBazFromBar((Bar())) }; + // CHECK-O0: %[[ARRIDX2:.+]] = getelementptr inbounds [3 x %class.Bar], ptr %[[LISTBAR2:.+]], i64 0, i64 0 + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[TFLPAD2:.+]] + // CHECK: [[DETACHED2]]: + // CHECK-O0: invoke void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[ARRIDX2]]) + // CHECK-O1: invoke void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[LISTBAR2:.+]]) + // CHECK-NEXT: to label %[[INVOKECONT5:.+]] unwind label %[[DETLPAD2:.+]] + // CHECK: [[INVOKECONT5]]: + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + // CHECK: [[CONTINUE2]]: + + // CHECK-O0: %[[ARRIDX3:.+]] = getelementptr inbounds %class.Bar, ptr %[[ARRIDX2]], i64 1 + // CHECK-O1: %[[ARRIDX3:.+]] = getelementptr inbounds %class.Bar, ptr %[[LISTBAR2]], i64 1 + // CHECK: %[[TASKFRAME3:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[TFLPAD3:.+]] + // CHECK: [[DETACHED3]]: + // CHECK: invoke void @_Z7makeBarv(ptr {{.*}}sret(%class.Bar) {{.*}}%[[ARRIDX3]]) + // CHECK-NEXT: to label %[[INVOKECONT6:.+]] unwind label %[[DETLPAD3:.+]] + // CHECK: [[INVOKECONT6]]: + // CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + + // CHECK-O0: %[[ARRIDX4:.+]] = getelementptr inbounds %class.Bar, ptr %[[ARRIDX3]], i64 1 + // CHECK-O1: %[[ARRIDX4:.+]] = getelementptr inbounds %class.Bar, ptr %[[LISTBAR2]], i64 2 + // CHECK-O0: %[[TASKFRAME4:.+]] = call token @llvm.taskframe.create() + // CHECK: %[[REFTMP2:.+]] = alloca %class.Baz + // CHECK: %[[AGGTMP3:.+]] = alloca %class.Bar + // CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]]) + // CHECK-NEXT: to label %[[INVOKECONT7:.+]] unwind label %[[TFLPAD4:.+]] + // CHECK: [[INVOKECONT7]]: + // CHECK-O0-NEXT: detach within %[[SYNCREG]], label %[[DETACHED4:.+]], label %[[CONTINUE4:.+]] unwind label %[[TFLPAD4]] + // CHECK-O0: [[DETACHED4]]: + // CHECK-O0: call void @llvm.taskframe.use(token %[[TASKFRAME4]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[REFTMP2]]) + // CHECK: invoke void @_Z14makeBazFromBar3Bar(ptr {{.*}}sret(%class.Baz) {{.*}}%[[REFTMP2]], ptr {{.*}}%[[AGGTMP3]]) + // CHECK: to label %[[INVOKECONT8:.+]] unwind label %[[DETLPAD4:.+]] + // CHECK: [[INVOKECONT8]]: + // CHECK-NEXT: invoke void @_ZN3BarC1ERK3Baz(ptr {{.*}}%[[ARRIDX4:.+]], ptr {{.*}}dereferenceable(1) %[[REFTMP2]]) + // CHECK-NEXT: to label %[[INVOKECONT9:.+]] unwind label %[[DETLPAD4_2:.+]] + // CHECK: [[INVOKECONT9]]: + // CHECK-NEXT: call void @_ZN3BazD1Ev(ptr {{.*}}dereferenceable(1) %[[REFTMP2]]) + // CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 1, ptr nonnull %[[REFTMP2]]) + // CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[AGGTMP3]]) + // CHECK-O0-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE4]] + // CHECK-O0: [[CONTINUE4]]: + + // CHECK: [[TFLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + // CHECK-O1-NEXT: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], + // CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD_3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TFLPAD]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD2]] + + // CHECK: [[TFLPAD2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[DETLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD3]] + + // CHECK: [[TFLPAD3]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], + // CHECK-NEXT: to label %[[UNREACHABLE]] unwind + + // CHECK: [[TFLPAD4]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD4]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK: [[DETLPAD4_2]]: + // CHECK-NEXT: landingpad + // CHECK-NEXT: cleanup + + // CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFLPAD4]] + + // CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME4]], + // CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind +} diff --git a/clang/test/Cilk/early-return-while.c b/clang/test/Cilk/early-return-while.c new file mode 100644 index 00000000000000..78d2e8742d954d --- /dev/null +++ b/clang/test/Cilk/early-return-while.c @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +void bar(); +int baz(int); + +void foo(int p) { + while (p) { + if (baz(p)) + return; + _Cilk_spawn bar(); + --p; + } + bar(); +} + +// CHECK-LABEL: define {{.*}}void @foo( + +// CHECK: br i1 %{{.+}}, label %[[WHILE_BODY:.+]], label %[[WHILE_END:.+]] + +// CHECK: [[WHILE_BODY]]: +// CHECK: br i1 %{{.+}}, label %[[THEN:.+]], label %[[END:.+]] + +// CHECK: [[THEN]]: +// CHECK-NEXT: br label %[[RETURN:.+]] + +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void {{.*}}@bar() +// CHECK: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: br + +// CHECK: [[WHILE_END]]: +// CHECK-NEXT: call void {{.*}}@bar() +// CHECK-NEXT: br label %[[RETURN]] + +// CHECK: [[RETURN]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +// CHECK: ret void diff --git a/clang/test/Cilk/early-return.c b/clang/test/Cilk/early-return.c new file mode 100644 index 00000000000000..e64e82c91acbd3 --- /dev/null +++ b/clang/test/Cilk/early-return.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +void bar(); + +void foo(int p) { + _Cilk_spawn bar(); + if (p) + return; + bar(); +} + +// CHECK-LABEL: define {{.*}}void @foo( +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void {{.*}}@bar() +// CHECK: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: br i1 %{{.+}}, label %[[THEN:.+]], label %[[END:.+]] + +// CHECK: [[THEN]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +// CHECK: [[END]]: +// CHECK-NEXT: call void {{.*}}@bar() + +// CHECK: sync within %[[SYNCREG]] + +// CHECK: ret void + diff --git a/clang/test/Cilk/early-return.cpp b/clang/test/Cilk/early-return.cpp new file mode 100644 index 00000000000000..c38569348a39c9 --- /dev/null +++ b/clang/test/Cilk/early-return.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s + +class Obj { +public: + Obj(); + ~Obj(); +}; + +void bar(); + +void foo(int p) { + Obj o1; + _Cilk_spawn bar(); + if (p) + return; + Obj o2; + bar(); +} + +// CHECK-LABEL: define {{.*}}void @_Z3fooi(i32 +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind + +// CHECK: [[DETACHED]]: +// CHECK: invoke void @_Z3barv() +// CHECK: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: br i1 %{{.+}}, label %[[THEN:.+]], label %[[END:.+]] + +// CHECK: [[THEN]]: +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label + +// CHECK: [[SUCONT]]: +// CHECK: br label %[[CLEANUP:.+]] + +// CHECK: [[END]]: +// CHECK-NEXT: invoke void @_ZN3ObjC1Ev(ptr {{.*}}dereferenceable(1) %[[O2:.+]]) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: invoke void @_Z3barv() +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] + +// CHECK: [[SYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT2:.+]] unwind label + +// CHECK: [[SUCONT2]]: +// CHECK-NEXT: call void @_ZN3ObjD1Ev(ptr {{.*}}dereferenceable(1) %[[O2:.+]]) +// CHECK: br label %[[CLEANUP]] + +// CHECK: [[CLEANUP]]: +// CHECK-NEXT: call void @_ZN3ObjD1Ev(ptr {{.*}}dereferenceable(1) %[[O1:.+]]) + +// CHECK: ret void diff --git a/clang/test/Cilk/hyper-address.c b/clang/test/Cilk/hyper-address.c new file mode 100644 index 00000000000000..3e037997e8dde0 --- /dev/null +++ b/clang/test/Cilk/hyper-address.c @@ -0,0 +1,30 @@ +/* Test two ways to take the address of a reducer: + 1. __builtin_addressof returns leftmost view + 2. & returns current view +*/ +// RUN: %clang_cc1 %s -x c -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +// This does not compile in C++ because function overloading requires +// an exact match for hyperobject types. C allows assigning to a +// generic hyperobject. +void identity(void * value); +void reduce(void* left, void* right); +extern void consume_view(long *); +extern void consume_hyper(long _Hyperobject *); +// CHECK-LABEL: assorted_addresses +void assorted_addresses() +{ + // CHECK: call void @llvm.reducer.register + long _Hyperobject(identity, reduce) sum = 0; + // CHECK-NOT: llvm.hyper.lookup + // CHECK: call void @[[FN1:.*consume_hyper]] + consume_hyper(__builtin_addressof(sum)); + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: call void @[[FN2:.*consume_view]] + consume_view(&sum); + // CHECK: call void @llvm.reducer.unregister + // CHECK-NOT: call void @llvm.reducer.unregister + // CHECK: ret void +} + diff --git a/clang/test/Cilk/hyper-alias-ctor.cpp b/clang/test/Cilk/hyper-alias-ctor.cpp new file mode 100644 index 00000000000000..c12705c9cdf7d6 --- /dev/null +++ b/clang/test/Cilk/hyper-alias-ctor.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +template +struct Constructed { + Constructed(); + ~Constructed(); + //static void identity(Constructed *); + //static void reduce(Constructed *left, Constructed *right); + static void identity(void *); + static void reduce(void *left, void *right); +}; + +// Make sure hyperobjects pass through (template using) unharmed. +template +using Alias = + Constructed _Hyperobject(Constructed::identity, Constructed::reduce); + +void f() +{ + // CHECK: call {{.+}} @_ZN11ConstructedIiEC1Ev + // CHECK: call void @llvm.reducer.register + Alias local; + // CHECK: call void @llvm.reducer.unregister + // CHECK: call {{.+}} @_ZN11ConstructedIiED1Ev +} diff --git a/clang/test/Cilk/hyper-array-extern-1.cpp b/clang/test/Cilk/hyper-array-extern-1.cpp new file mode 100644 index 00000000000000..a2fe40185797ff --- /dev/null +++ b/clang/test/Cilk/hyper-array-extern-1.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -x c -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +extern int _Hyperobject x[10]; + +// One array with 10 hyperobject elements +// CHECK-LABEL: read_array_hyper +int read_array_hyper(unsigned i) +{ + return x[i]; + // CHECK: %[[ARRAYIDX:.+]] = getelementptr inbounds + // CHECK: %[[VIEWRAW:.+]] = call ptr @llvm.hyper.lookup.i64(ptr %[[ARRAYIDX]], i64 4, ptr null, ptr null) + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: %[[VAL:.+]] = load i32, ptr %[[VIEWRAW]] + // CHECK: ret i32 %[[VAL]] +} diff --git a/clang/test/Cilk/hyper-array-extern-2.cpp b/clang/test/Cilk/hyper-array-extern-2.cpp new file mode 100644 index 00000000000000..c23f550e132b46 --- /dev/null +++ b/clang/test/Cilk/hyper-array-extern-2.cpp @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +// One hyperobject array with 10 integer elementso +typedef int I10[10]; +extern I10 _Hyperobject y; +// CHECK-LABEL: read_hyper_array +int read_hyper_array(unsigned i) +{ + return y[i]; + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // Make sure the array is not copied to the stack. + // CHECK-NOT: call void @llvm.memcpy + // CHECK: getelementptr + // CHECK: load i32 + // CHECK: ret i32 +} diff --git a/clang/test/Cilk/hyper-array-global.cpp b/clang/test/Cilk/hyper-array-global.cpp new file mode 100644 index 00000000000000..0108df94f79a1e --- /dev/null +++ b/clang/test/Cilk/hyper-array-global.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +struct S { S() noexcept; int val; ~S() noexcept; }; +typedef S S10[10]; +S10 _Hyperobject s; + +// CHECK-NOT: call void @llvm.reducer.register +// CHECK-NOT: call void @llvm.reducer.unregister +// CHECK: call void @_ZN1SC1Ev +// CHECK-NOT: call void @llvm.reducer.register +// CHECK-NOT: call void @llvm.reducer.unregister +// CHECK: call void @_ZN1SD1Ev +// CHECK-NOT: call void @llvm.reducer.register +// CHECK-NOT: call void @llvm.reducer.unregister diff --git a/clang/test/Cilk/hyper-array-local.cpp b/clang/test/Cilk/hyper-array-local.cpp new file mode 100644 index 00000000000000..e1ecc2d2f2fe30 --- /dev/null +++ b/clang/test/Cilk/hyper-array-local.cpp @@ -0,0 +1,43 @@ +// RUN: %clang_cc1 %s -x c -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +extern void identity_long(void *); +extern void reduce_long(void *, void *); + +typedef long _Hyperobject(identity_long, reduce_long) rlong; + +// CHECK-LABEL: local_array_of_hyper +long local_array_of_hyper(unsigned int x) +{ + // CHECK: %x.addr = alloca + // CHECK: %[[ARRAY:.+]] = alloca [10 x i64] + // The outermost variable is not a hyperobject, and registration of + // hyperobject array elements is not implemented. + // CHECK-NOT: call void @llvm.reducer.register + rlong array[10]; // expected-warning{{array of reducer not implemented}} + // CHECK: getelementptr inbounds [[JUNK:.+]] %[[ARRAY]] + // CHECK: %[[RAW:.+]] = call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: %[[VAL:.+]] = load i64, ptr %[[RAW]] + return array[x]; + // CHECK-NOT: call void @llvm.reducer.unregister + // CHECK ret i64 [[VAL]] +} + +// CHECK-LABEL: local_hyper_of_array +long local_hyper_of_array(unsigned int x) +{ + // CHECK: %x.addr = alloca + // CHECK: %[[ARRAY:.+]] = alloca [10 x i64] + // A hyperobject without reducer attribute should not be registered. + // CHECK-NOT: call void @llvm.reducer.register + typedef long Array[10]; + Array _Hyperobject array; + // CHECK: %[[RAW:.+]] = call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: %[[ELEMENT:.+]] = getelementptr inbounds [[JUNK:.+]] %[[RAW]] + // CHECK: %[[VAL:.+]] = load i64, ptr %[[ELEMENT]] + return array[x]; + // CHECK-NOT: call void @llvm.reducer.unregister + // CHECK ret i64 [[VAL]] +} + diff --git a/clang/test/Cilk/hyper-assign.c b/clang/test/Cilk/hyper-assign.c new file mode 100644 index 00000000000000..2c7fe46fd6b5c1 --- /dev/null +++ b/clang/test/Cilk/hyper-assign.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +extern long _Hyperobject x, _Hyperobject y; + +long chain_assign() +{ + // CHECK: %[[Y1RAW:.+]] = call ptr @llvm.hyper.lookup.i64(ptr @y, i64 8, ptr null, ptr null) + // CHECK: %[[Y1VAL:.+]] = load i64, ptr %[[Y1RAW]] + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 8, ptr null, ptr null) + // CHECK: store i64 %[[Y1VAL]] + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @y, i64 8, ptr null, ptr null) + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 8, ptr null, ptr null) + return x = y = x = y; +} + +long simple_assign(long val) +{ + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 8, ptr null, ptr null) + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store i64 + return x = val; +} + +long subtract() +{ + // The order is not fixed here. + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @y, i64 8, ptr null, ptr null) + // CHECK: load i64 + // CHECK: add nsw i64 %[[Y:.+]], 1 + // CHECK: store i64 + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 8, ptr null, ptr null) + // CHECK: load i64 + // CHECK: sub nsw + // CHECK: store i64 + return x -= y++; +} diff --git a/clang/test/Cilk/hyper-autoincr.c b/clang/test/Cilk/hyper-autoincr.c new file mode 100644 index 00000000000000..9461da1c12d61d --- /dev/null +++ b/clang/test/Cilk/hyper-autoincr.c @@ -0,0 +1,119 @@ +// Test autoincrement operations on hyperobjects. +// RUN: %clang_cc1 %s -x c -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +extern void identity(void * value); +extern void reduce(void* left, void* right); + +typedef long _Hyperobject *long_hp; +typedef long _Hyperobject long_h; +extern int _Hyperobject x, _Hyperobject y; +// CHECK-LABEL: extern1 +void extern1() +{ + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i32, + // Only one call for a read-modify-write operation. + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store i32 + // CHECK: ret void + ++x; +} + +// CHECK-LABEL: extern2 +int extern2() +{ + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i32, + // Only one call for a read-modify-write operation. + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store i32 + // CHECK: ret i32 + return 1 + --x; +} + +// CHECK-LABEL: ptr_with_direct_typedef +long ptr_with_direct_typedef(long_hp ptr) +{ + // CHECK-NOT: ret i64 + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: ret i64 + return ++*ptr; +} + +// CHECK-LABEL: ptr_with_indirect_typedef_1 +long ptr_with_indirect_typedef_1(long_h *ptr) +{ + // CHECK-NOT: ret i64 + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: store i64 + // CHECK: ret i64 + return *ptr++; // this increments the pointer, a dead store +} + +// CHECK-LABEL: ptr_with_indirect_typedef_2 +long ptr_with_indirect_typedef_2(long_h *ptr) +{ + // CHECK-NOT: ret i64 + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: store i64 + // CHECK: ret i64 + return *++ptr; // again, the increment is dead +} + +// CHECK-LABEL: ptr_with_indirect_typedef_3 +long ptr_with_indirect_typedef_3(long_h *ptr) +{ + // CHECK-NOT: ret i64 + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i64 + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store i64 + // CHECK: ret i64 + return ptr[0]++; +} + +// CHECK-LABEL: direct_typedef_1 +long direct_typedef_1() +{ + extern long_h z; + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i64, + // CHECK: store i64 + // CHECK: ret i64 + return ++z; +} + +// CHECK-LABEL: local_reducer_1 +double local_reducer_1() +{ + // Initialization precedes registration + // CHECK: store double 0.0 + // CHECK: call void @llvm.reducer.register + double _Hyperobject(identity, reduce) x = 0.0; + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load double + // CHECK: fadd double + // CHECK: store double + x += 1.0f; + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load double + // CHECK: call void @llvm.reducer.unregister + // CHECK: ret double + return x; +} + +// CHECK-LABEL: two_increments +long two_increments() +{ + // It would also be correct for evaluation of operands of + to be interleaved. + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i32 + // CHECK: store i32 + // CHECK: call ptr @llvm.hyper.lookup + // CHECK: load i32 + // CHECK: store i32 + // CHECK: ret i64 + return ++x + y++; +} + diff --git a/clang/test/Cilk/hyper-bare.cpp b/clang/test/Cilk/hyper-bare.cpp new file mode 100644 index 00000000000000..317cb42b1e5185 --- /dev/null +++ b/clang/test/Cilk/hyper-bare.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 %s -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +struct S { S(); ~S(); int x; }; +// Should be constructed and destructed like a regular variable. +struct S _Hyperobject shyper; + +// CHECK-NOT: call void @llvm.reducer.register +// CHECK: call void @_ZN1SC1Ev +// CHECK-NOT: call void @llvm.reducer.register +// CHECK-NOT: call void @llvm.reducer.unregister +// CHECK: call void @_ZN1SD1Ev(ptr noundef nonnull align 4 dereferenceable(4) @shyper) +// CHECK-NOT: call void @llvm.reducer.register +// CHECK-NOT: call void @llvm.reducer.unregister diff --git a/clang/test/Cilk/hyper-cast-bad.c b/clang/test/Cilk/hyper-cast-bad.c new file mode 100644 index 00000000000000..d3751efb3c2953 --- /dev/null +++ b/clang/test/Cilk/hyper-cast-bad.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 %s -xc -fopencilk -verify -fsyntax-only +// RUN: %clang_cc1 %s -xc++ -fopencilk -verify -fsyntax-only + +void f() +{ + (void)(int)1; + (void)(int _Hyperobject)1; // expected-error{{cast to hyperobject}} + // TODO: It would be nicer to have only one error here. + (void)(struct S _Hyperobject)1; // expected-error{{incomplete type 'struct S' may not be a hyperobject}} expected-error{{cast to hyperobject}} expected-note{{forward declaration}} +} diff --git a/clang/test/Cilk/hyper-complex.c b/clang/test/Cilk/hyper-complex.c new file mode 100644 index 00000000000000..6a76ed70480b23 --- /dev/null +++ b/clang/test/Cilk/hyper-complex.c @@ -0,0 +1,35 @@ +// RUN: %clang_cc1 %s -x c -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +extern __complex__ float _Hyperobject c; + +// CHECK-LABEL: get_real +float get_real() +{ + // CHECK: %[[RAW1:.+]] = call ptr @llvm.hyper.lookup.i64(ptr @c, i64 8, ptr null, ptr null) + // CHECK: %[[FIELD1:.+]] = getelementptr inbounds { float, float }, ptr %[[RAW1]], i32 0, i32 0 + // CHECK: %[[RET1:.+]] = load float, ptr %[[FIELD1]] + // CHECK: ret float %[[RET1]] + return __real__(c); +} +// CHECK-LABEL: get_imag +float get_imag() +{ + // CHECK: %[[RAW2:.+]] = call ptr @llvm.hyper.lookup.i64(ptr @c, i64 8, ptr null, ptr null) + // CHECK: %[[FIELD2:.+]] = getelementptr inbounds { float, float }, ptr %[[RAW2]], i32 0, i32 1 + // CHECK: load float, ptr %[[FIELD2]] + // CHECK: ret float + return __imag__(c); +} + +// CHECK-LABEL: get_abs +float get_abs() +{ + // Only one call to llvm.hyper.lookup. + // CHECK: @llvm.hyper.lookup.i64(ptr @c, i64 8, ptr null, ptr null) + // CHECK-NOT: @llvm.hyper.lookup + // CHECK: call float @cabsf + // CHECK: ret float + return __builtin_cabsf(c); +} diff --git a/clang/test/Cilk/hyper-copy.c b/clang/test/Cilk/hyper-copy.c new file mode 100644 index 00000000000000..73eac0728bbd2d --- /dev/null +++ b/clang/test/Cilk/hyper-copy.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +struct S { int first, second; }; + +extern struct S _Hyperobject a __attribute__((aligned(8))); +extern struct S b __attribute__((aligned(8))); + +// CHECK-LABEL: scopy +void scopy() +{ + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @a, i64 8, ptr null, ptr null) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 @b, + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @a, i64 8, ptr null, ptr null) + // CHECK: call void @llvm.memcpy.p0.p0.i64 + // CHECK: ret void + b = a; + a = b; +} diff --git a/clang/test/Cilk/hyper-destruct.cpp b/clang/test/Cilk/hyper-destruct.cpp new file mode 100644 index 00000000000000..bc260e5c4023ab --- /dev/null +++ b/clang/test/Cilk/hyper-destruct.cpp @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -disable-llvm-passes -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +template class Bag; + +template +class Pennant +{ +public: + Pennant(); + Pennant(T*); + ~Pennant(); + + friend class Bag; +}; + +template +class Bag +{ + unsigned int field1; + Pennant* *bag; + + Pennant* filling; + + unsigned int field2; + +public: + Bag(); + Bag(Bag*); + + ~Bag(); + + static void identity(void *value); + static void reduce(void *left, void *right); +}; + +template +Bag::~Bag() +{ +} + +template +using Bag_red = Bag _Hyperobject(Bag::identity, Bag::reduce); + +void f() +{ + Bag_red b1; + // Ensure that the destructor is emitted. + // CHECK: define linkonce_odr {{.+}} @_ZN3BagIiED2Ev +} diff --git a/clang/test/Cilk/hyper-errors.c b/clang/test/Cilk/hyper-errors.c new file mode 100644 index 00000000000000..01e269add34ef9 --- /dev/null +++ b/clang/test/Cilk/hyper-errors.c @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only -Werror=incompatible-function-pointer-types -Werror=int-conversion +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -fsyntax-only +struct C { int _Hyperobject c; }; +struct C _Hyperobject c; // expected-error{{type 'struct C', which contains a hyperobject, may not be a hyperobject}} +long _Hyperobject d; // expected-note{{previous definition}} +void f() { + extern int _Hyperobject d; + // expected-error@-1{{redeclaration of 'd' with a different type: 'int _Hyperobject' vs 'long _Hyperobject'}} +} +char _Hyperobject e; // expected-note{{previous definition}} +typedef long _Hyperobject long_h; +void g() { + extern long_h e; // expected-error{{redeclaration of 'e'}} +} + +extern void reduce(void *, void *), identity(void *); + +struct D { + int _Hyperobject(identity, reduce) field; + // expected-warning@-1{{reducer callbacks not implemented for structure members}} +}; + +int _Hyperobject(reduce, identity) h; + // expected-error@-1{{incompatible function pointer types passing 'void (*)(void *, void *)' to parameter of type 'void (*)(void *)'}} + // expected-error@-2{{incompatible function pointer types passing 'void (*)(void *)' to parameter of type 'void (*)(void *, void *)'}} + +int _Hyperobject(x) i; // expected-error{{use of undeclared identifier 'x'}} +int _Hyperobject(0) j; // expected-error{{hyperobject must have 0 or 2 callbacks}} +int _Hyperobject(0,0,0,0) k; // expected-error{{hyperobject must have 0 or 2 callbacks}} +int _Hyperobject(0, 1) x; // expected-error{{incompatible integer to pointer conversion passing 'int' to parameter of type 'void (*)(void *, void *)'}} + +void function() { + int _Hyperobject(typo1, reduce) var1 = 0; + // expected-error@-1{{use of undeclared identifier 'typo1'}} + int _Hyperobject(typo2, typo3) var2 = 0; + // expected-error@-1{{use of undeclared identifier 'typo2'}} + // expected-error@-2{{use of undeclared identifier 'typo3'}} + int _Hyperobject(0, typo4) var3 = 0; + // expected-error@-1{{use of undeclared identifier 'typo4'}} +} diff --git a/clang/test/Cilk/hyper-expand1.cpp b/clang/test/Cilk/hyper-expand1.cpp new file mode 100644 index 00000000000000..de4b37ed631448 --- /dev/null +++ b/clang/test/Cilk/hyper-expand1.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 %s -x c++ -O1 -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +template static void zero(void *v); + +template static void zero(void *v) { + *static_cast(v) = static_cast(0); +} + +template static void plus(void *l, void *r) { + *static_cast(l) += *static_cast(r); +} + +template +using reducer_opadd = T _Hyperobject(zero, plus); + +extern double X[], Y[]; + +// CHECK-LABEL: mult_direct +template +T mult_direct(T *x, T *y) { + reducer_opadd * a = nullptr; + reducer_opadd * b = a; + + reducer_opadd result_reducer = 0; + // CHECK: call void @llvm.reducer.register + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: getelementptr + // CHECK-NEXT: load double, + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: getelementptr + // CHECK-NEXT: load double, + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: load double + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store double + result_reducer += x[0]*y[0]; + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: ret double + return result_reducer; +} + +double f() { + return mult_direct(X, Y); +} diff --git a/clang/test/Cilk/hyper-expand2.cpp b/clang/test/Cilk/hyper-expand2.cpp new file mode 100644 index 00000000000000..ee81dc49156435 --- /dev/null +++ b/clang/test/Cilk/hyper-expand2.cpp @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 %s -x c++ -O1 -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +extern double X[], Y[]; + +template +struct Box { T value; }; + +template +// CHECK-LABEL: mult_indirect +void mult_indirect(Box _Hyperobject *H, T *x, T *y) { + // CHECK-NOT: call void @llvm.reducer.register + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: getelementptr + // CHECK-NEXT: load double, + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: getelementptr + // CHECK-NEXT: load double, + // CHECK: call ptr @llvm.hyper.lookup + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: load double + // CHECK-NOT: call ptr @llvm.hyper.lookup + // CHECK: store double + H->value += x[0]*y[0]; + // CHECK: ret void +} + +typedef void (*Fn)(Box _Hyperobject*, double *, double *); + +Fn g() { + return &mult_indirect; +} diff --git a/clang/test/Cilk/hyper-expand3.cpp b/clang/test/Cilk/hyper-expand3.cpp new file mode 100644 index 00000000000000..a1b7c07a14d91f --- /dev/null +++ b/clang/test/Cilk/hyper-expand3.cpp @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -x c++ -O1 -fopencilk -verify -S -fsyntax-only +// RUN: %clang_cc1 --std=c++17 %s -x c++ -O1 -fopencilk -verify -S -fsyntax-only +// expected-no-diagnostics +// See opencilk-project issue 132. The reference to hyperobject red +// does not have a dependent type but arises in a dependent context. + +void zero(void *v) { *(int *)v = 0; } + +void plus(void *l, void *r) { *(int *)l += *(int *)r; } + +template int f() { + int _Hyperobject(zero, plus) red; + red += 5; + return red; +} + +int main() { return f(); } diff --git a/clang/test/Cilk/hyper-generic.c b/clang/test/Cilk/hyper-generic.c new file mode 100644 index 00000000000000..133980c5c58eb6 --- /dev/null +++ b/clang/test/Cilk/hyper-generic.c @@ -0,0 +1,66 @@ +// A comment line. +// RUN: %clang_cc1 %s -std=c11 -O1 -fopencilk -ftapir=none -S -emit-llvm -o - | FileCheck %s + +#define ADD(SFX, T) \ + static void add_##SFX(void *l, void *r) { *(T *)l += *(T *)r; } +#define ZERO(SFX, T) \ + static void zero_##SFX(void *v) { *(T *)v = (T)0; } + +ADD(sc, signed char) ZERO(sc, signed char) +ADD(uc, unsigned char) ZERO(uc, unsigned char) +ADD(ss, short) ZERO(ss, short) +ADD(us, unsigned short) ZERO(us, unsigned short) +ADD(si, int) ZERO(si, int) +ADD(ui, unsigned int) ZERO(ui, unsigned int) +ADD(sl, long) ZERO(sl, long) +ADD(ul, unsigned long) ZERO(ul, unsigned long) + +ADD(f, float) ZERO(f, float) +ADD(d, double) ZERO(d, double) +ADD(ld, long double) ZERO(ld, long double) + +#define SELECT(PFX, T) \ + _Generic((T)0, \ + signed char : PFX##sc, \ + unsigned char : PFX##uc, \ + short : PFX##ss, \ + unsigned short : PFX##us, \ + int : PFX##si, \ + unsigned int : PFX##ui, \ + long : PFX##sl, \ + unsigned long : PFX##ul, \ + float : PFX##f, \ + double : PFX##d, \ + long double : PFX##ld \ +) + +#define ADD_REDUCER(T) \ + T _Hyperobject(SELECT(zero_, T), SELECT(add_, T)) + +// CHECK-LABEL: define_int_reducer +void define_int_reducer(long *out) +{ + // CHECK: call void @llvm.reducer.register.i64 + // CHECK: ptr nonnull @zero_sl + // CHECK: ptr nonnull @add_sl + ADD_REDUCER(long) sum; + _Cilk_for (int i = 0; i < 3900; ++i) + sum += i; + *out = sum; + // CHECK: call void @llvm.reducer.unregister + // CHECK-NOT: call void @llvm.reducer.unregister +} + +// CHECK-LABEL: define_float_reducer +void define_float_reducer(float *out) +{ + // CHECK: call void @llvm.reducer.register.i64 + // CHECK: ptr nonnull @zero_f + // CHECK: ptr nonnull @add_f + ADD_REDUCER(float) sum; + _Cilk_for (int i = 0; i < 3900; ++i) + sum += i; + *out = sum; + // CHECK: call void @llvm.reducer.unregister + // CHECK-NOT: call void @llvm.reducer.unregister +} diff --git a/clang/test/Cilk/hyper-global-c.c b/clang/test/Cilk/hyper-global-c.c new file mode 100644 index 00000000000000..4c7f776cea02d1 --- /dev/null +++ b/clang/test/Cilk/hyper-global-c.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -triple amd64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +void identity_long(void *v); +void reduce_long(void *l, void *r); + +// CHECK-LABEL: cxx_global_var_init +// CHECK: store i64 1, ptr @global +// CHECK: call void @llvm.reducer.register.i64 +long _Hyperobject(identity_long, reduce_long) global = 1; + +// CHECK: call void @llvm.reducer.unregister diff --git a/clang/test/Cilk/hyper-global-ctor-dtor.cpp b/clang/test/Cilk/hyper-global-ctor-dtor.cpp new file mode 100644 index 00000000000000..0ca64e849a472a --- /dev/null +++ b/clang/test/Cilk/hyper-global-ctor-dtor.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -triple amd64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +struct S { S(int); ~S(); int x; }; + +void identity_S(void *v); +void reduce_S(void *l, void *r); + +// CHECK-LABEL: cxx_global_var_init +// CHECK: call void @_ZN1SC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @global, i32 noundef 1) +// CHECK: call void @llvm.reducer.register.i64 +S _Hyperobject(identity_S, reduce_S) global(1); + +// CHECK: call void @llvm.reducer.unregister +// CHECK: call void @_ZN1SD1Ev diff --git a/clang/test/Cilk/hyper-global-ctor-only.cpp b/clang/test/Cilk/hyper-global-ctor-only.cpp new file mode 100644 index 00000000000000..a477925de5ab74 --- /dev/null +++ b/clang/test/Cilk/hyper-global-ctor-only.cpp @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 %s -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +struct S { S(int); int x; }; + +void identity_S(void *v); +void reduce_S(void *l, void *r); + +// CHECK-LABEL: cxx_global_var_init +// CHECK: call void @_ZN1SC1Ei(ptr noundef nonnull align 4 dereferenceable(4) @global, i32 noundef 1) +// CHECK: call void @llvm.reducer.register.i64 +S _Hyperobject(identity_S, reduce_S) global = 1; + +// CHECK: call void @llvm.reducer.unregister +// CHECK-NOT: _ZN1SD1Ev diff --git a/clang/test/Cilk/hyper-global-dtor-only.cpp b/clang/test/Cilk/hyper-global-dtor-only.cpp new file mode 100644 index 00000000000000..c8f03b09beead5 --- /dev/null +++ b/clang/test/Cilk/hyper-global-dtor-only.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 %s -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +struct S { ~S(); int x; }; + +void identity_S(void *v); +void reduce_S(void *l, void *r); + +// CHECK-LABEL: __cxx_global_var_init +// CHECK: call void @llvm.reducer.register.i64 +S _Hyperobject(identity_S, reduce_S) global; + +// CHECK: call void @llvm.reducer.unregister +// CHECK: call void @_ZN1SD1Ev diff --git a/clang/test/Cilk/hyper-new-final.cpp b/clang/test/Cilk/hyper-new-final.cpp new file mode 100644 index 00000000000000..075642cbbcae2a --- /dev/null +++ b/clang/test/Cilk/hyper-new-final.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +template class base { +protected: + base(); + virtual ~base() noexcept; +}; + +extern base var; + +template +class derived final : public base +{ +public: + void reduce(derived* other); + + static void reduce(void *left_v, void *right_v); + static void identity(void *view); + ~derived() noexcept; + derived(); + derived(const base& os); +}; + + +extern void use(derived&, const char *); + +template + using reducer = derived + _Hyperobject(&derived::identity, &derived::reduce); + +// CHECK-LABEL: testfn +int testfn(int argc, char *argv[]) { + // Should call derived::derived(const base &) + // CHECK: ptr @_Znwm + // CHECK: {{void|ptr}} @_ZN7derivedIcEC1ERK4baseIcE + reducer *r = new reducer(var); + // Should lookup view + // CHECK: @llvm.hyper.lookup + // CHECK: call void @_Z3useR7derivedIcEPKc + use(*r, "Hello\n"); + // CHECK-LABEL: delete.notnull + // The destructor should be called directly as derived::~derived. + // CHECK: call {{void|.*ptr}} @_ZN7derivedIcED1Ev + // CHECK: call void @_ZdlPv + delete r; + return 0; +} diff --git a/clang/test/Cilk/hyper-new.cpp b/clang/test/Cilk/hyper-new.cpp new file mode 100644 index 00000000000000..df53341c5aab23 --- /dev/null +++ b/clang/test/Cilk/hyper-new.cpp @@ -0,0 +1,50 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics + +template class base { +protected: + base(); + virtual ~base() noexcept; +}; + +extern base var; + +template +class derived : public base +{ +public: + void reduce(derived* other); + + static void reduce(void *left_v, void *right_v); + static void identity(void *view); + ~derived() noexcept; + derived(); + derived(const base& os); +}; + + +extern void use(derived&, const char *); + +template + using reducer = derived + _Hyperobject(&derived::identity, &derived::reduce); + +// CHECK-LABEL: testfn +int testfn(int argc, char *argv[]) { + // Should call derived::derived(const base &) + // CHECK: ptr @_Znwm + // CHECK: {{void|ptr}} @_ZN7derivedIcEC1ERK4baseIcE + reducer *r = new reducer(var); + // Should lookup view + // CHECK: @llvm.hyper.lookup + // CHECK: call void @_Z3useR7derivedIcEPKc + use(*r, "Hello\n"); + // CHECK-LABEL: delete.notnull + // Class derived is not final so the vtable should be used. + // CHECK: %vtable = load + // CHECK: %[[VFN:[a-z0-9]+]] = getelementptr + // CHECK: %[[DTOR:[0-9]+]] = load ptr, ptr %[[VFN]] + // CHECK: call void %[[DTOR]] + delete r; + return 0; +} diff --git a/clang/test/Cilk/hyper-no-dtor.cpp b/clang/test/Cilk/hyper-no-dtor.cpp new file mode 100644 index 00000000000000..439d4d0e6828db --- /dev/null +++ b/clang/test/Cilk/hyper-no-dtor.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +// Check for compiler crash trying to call non-existent destructor. +struct S { S(); }; +extern void identity(void *), reduce(void *, void *); + +// CHECK-LABEL: function +void function() +{ + // call {{.+}} @_ZN1SC1Ev + // CHECK: call void @llvm.reducer.register.i64 + S _Hyperobject(identity, reduce) s; + // CHECK: call void @llvm.reducer.unregister +} diff --git a/clang/test/Cilk/hyper-nocilk.c b/clang/test/Cilk/hyper-nocilk.c new file mode 100644 index 00000000000000..148af902513778 --- /dev/null +++ b/clang/test/Cilk/hyper-nocilk.c @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only +// RUN: %clang_cc1 -x c++ %s -verify -fsyntax-only + +int _Hyperobject x; // expected-warning{{_Hyperobject ignored}} + +extern void f(int _Hyperobject *); // expected-warning{{_Hyperobject ignored}} +extern void g(int *); +void h() { + f(&x); + g(&x); +} diff --git a/clang/test/Cilk/hyper-overload.cpp b/clang/test/Cilk/hyper-overload.cpp new file mode 100644 index 00000000000000..6b4cafefa49f77 --- /dev/null +++ b/clang/test/Cilk/hyper-overload.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +struct S { int operator&(); }; +extern int operator+(S &, int); + +// Behavior without hyperobjects +// CHECK-LABEL: f1 +// CHECK-NOT: @llvm.hyper.lookup +// CHECK: @_ZN1SadEv +int f1(struct S*sp) { return &*sp; } +// Lookup view then call S::operator &(). +// CHECK-LABEL: f2 +// CHECK: @llvm.hyper.lookup +// CHECK: @_ZN1SadEv +int f2(struct S _Hyperobject*sp) { return &*sp; } +// Lookup view then call operator+(S &, int); +// CHECK-LABEL: f3 +// CHECK: @llvm.hyper.lookup +// CHECK: @_ZplR1Si +int f3(struct S _Hyperobject*sp) { return *sp + 1; } diff --git a/clang/test/Cilk/hyper-param-bad.c b/clang/test/Cilk/hyper-param-bad.c new file mode 100644 index 00000000000000..51edd9dc577893 --- /dev/null +++ b/clang/test/Cilk/hyper-param-bad.c @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 %s -xc -fopencilk -verify -fsyntax-only +int f(int _Hyperobject x) // expected-error{{parameter is hyperobject}} +{ + return x; +} diff --git a/clang/test/Cilk/hyper-param-bad.cpp b/clang/test/Cilk/hyper-param-bad.cpp new file mode 100644 index 00000000000000..f27a6b5de3b194 --- /dev/null +++ b/clang/test/Cilk/hyper-param-bad.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -xc++ -fopencilk -verify -fsyntax-only +struct S +{ + void fn(int _Hyperobject); // expected-error{{parameter is hyperobject}} +}; + +extern int f(int _Hyperobject x); // expected-error{{parameter is hyperobject}} expected-note{{candidate function not viable}} + +void g() +{ + f(1); // expected-error{{no matching function for call to 'f'}} +} diff --git a/clang/test/Cilk/hyper-pointer.c b/clang/test/Cilk/hyper-pointer.c new file mode 100644 index 00000000000000..73b60d729b29fb --- /dev/null +++ b/clang/test/Cilk/hyper-pointer.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -x c -triple aarch64-freebsd -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// expected-no-diagnostics +extern double array[]; +extern const int size; + +// CHECK-LABEL: g +void g(double _Hyperobject *sum) { // hyperobject-aware function + // CHECK-LABEL: pfor.body + _Cilk_for (int i = 0; i < size; ++i) + // CHECK: call ptr @llvm.hyper.lookup + *sum += array[i]; +} diff --git a/clang/test/Cilk/hyper-reference.cpp b/clang/test/Cilk/hyper-reference.cpp new file mode 100644 index 00000000000000..9b62b7a156fb2c --- /dev/null +++ b/clang/test/Cilk/hyper-reference.cpp @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only +// expected-no-diagnostics +extern void f(int &, int _Hyperobject &); +void g(int _Hyperobject *p) +{ + f(*p, *p); +} diff --git a/clang/test/Cilk/hyper-register.c b/clang/test/Cilk/hyper-register.c new file mode 100644 index 00000000000000..ac8fca3465ad3e --- /dev/null +++ b/clang/test/Cilk/hyper-register.c @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics +#ifdef __cplusplus +extern "C" +#else +extern +#endif +void add(void *l, void *r), zero(void *v); + +typedef double _Hyperobject(zero, add) double_reducer; + +#ifdef __cplusplus +extern "C" void g(double *); +#else +extern void g(double *); +#endif + +// A register and unregister call pair should be generated whether +// the reducer attribute is directly on the variable declaration +// or inherited from a typedef. + +// CHECK-LABEL: f1 +double f1(double x) +{ + // CHECK: call void @llvm.reducer.register + double_reducer y = x; + // CHECK: call void @g + g(&y); + // CHECK: call void @llvm.reducer.unregister + // CHECK: ret double + return y; +} + +// CHECK-LABEL: f2 +double f2(double x) +{ + // CHECK: store double + // CHECK: call void @llvm.reducer.register + double _Hyperobject(zero, add) y = x; + // CHECK: call void @g + g(&y); + // CHECK: call void @llvm.reducer.unregister + // CHECK: ret double + return y; +} diff --git a/clang/test/Cilk/hyper-struct-assign.c b/clang/test/Cilk/hyper-struct-assign.c new file mode 100644 index 00000000000000..23fbc8ae77ed51 --- /dev/null +++ b/clang/test/Cilk/hyper-struct-assign.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only + +struct S { long _Hyperobject field; }; +extern struct S x, y; + +struct S simple_assign(long val) +{ + struct S tmp = {val}; + return x = tmp; // expected-error{{unimplemented assignment to structure with hyperobject member}} +} diff --git a/clang/test/Cilk/hyper-template-errors.cpp b/clang/test/Cilk/hyper-template-errors.cpp new file mode 100644 index 00000000000000..300dc90acf6ead --- /dev/null +++ b/clang/test/Cilk/hyper-template-errors.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only +template +struct reducer +{ +// See SemaType.cpp:ContainsHyperobject for choice of error message. + VIEW _Hyperobject value1; + // expected-error@-1{{type 'long _Hyperobject', which contains a hyperobject, may not be a hyperobject}} + // expected-error@-2{{type 'reducer', which contains a hyperobject, may not be a hyperobject}} + // expected-error@-3{{type 'wrap', which contains a hyperobject, may not be a hyperobject}} + int _Hyperobject value2; +}; + +reducer r_hl; // expected-note{{in instantiation}} +reducer r_l; +reducer r_i2; + +int f() { return r_l.value1 + r_l.value2; } +int g() { return r_i2.value1[0]; } + +reducer> s; // expected-note{{in instantiation}} + +template struct wrap { T field; }; +reducer> t; // expected-note{{in instantiation}} diff --git a/clang/test/Cilk/hyper-template.cpp b/clang/test/Cilk/hyper-template.cpp new file mode 100644 index 00000000000000..e5a5e78430be84 --- /dev/null +++ b/clang/test/Cilk/hyper-template.cpp @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +template struct S { T member; }; +S _Hyperobject S_long; + +// CHECK-LABEL: @_Z1fv +// CHECK: %0 = call ptr @llvm.hyper.lookup.i64(ptr @S_long, i64 8, ptr null, ptr null) +// CHECK-NOT: call ptr @llvm.hyper.lookup +// CHECK: getelementptr +// CHECK: %[[RET:.+]] = load i64 +// CHECK: ret i64 %[[RET]] +long f() { return S_long.member; } + +// CHECK-LABEL: _Z1gPH1SIsE +// CHECK: call ptr @llvm.hyper.lookup +// CHECK-NOT: call ptr @llvm.hyper.lookup +// CHECK: getelementptr +// CHECK: load i16 +long g(S _Hyperobject *p) { return p->member; } diff --git a/clang/test/Cilk/hyper-template2.cpp b/clang/test/Cilk/hyper-template2.cpp new file mode 100644 index 00000000000000..01b90a8cda76e1 --- /dev/null +++ b/clang/test/Cilk/hyper-template2.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +template +struct reducer +{ + static void identity(void *); + static void reduce(void *, void *); + char pad; + // Registration of structure members is not implemented. + VIEW _Hyperobject(identity, reduce) value; + // expected-warning@-1{{reducer callbacks not implemented}} + reducer(); + ~reducer(); +}; + +// CHECK: call {{.+}} @_ZN7reducerIsEC1Ev +// CHECK: @_ZN7reducerIsED1Ev +reducer r; // expected-note{{in instantiation}} + +// CHECK-LABEL: _Z1fv +// CHECK: call ptr @llvm.hyper.lookup +// CHECK-NOT: call ptr @llvm.hyper.lookup +// CHECK: load i16 +// CHECK: sext i16 +// CHECK: ret i32 +int f() { return r.value; } diff --git a/clang/test/Cilk/hyper-unary.c b/clang/test/Cilk/hyper-unary.c new file mode 100644 index 00000000000000..609420753beb09 --- /dev/null +++ b/clang/test/Cilk/hyper-unary.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +// This does not pass in C++ because hyperobject expression statements +// without side effects are not emitted. Unclear if this is a bug or a feature. +// expected-no-diagnostics + +extern int _Hyperobject x; +extern int _Hyperobject *xp; + +// CHECK-LABEL: function1 +void function1() +{ + // CHECK: store i32 1, ptr %[[Y:.+]], + int _Hyperobject y = 1; + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 4, ptr null, ptr null) + // CHECK: load i32 + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr %[[Y]], i64 4, ptr null, ptr null) + // CHECK: load i32 + (void)x; (void)y; +} + +// CHECK-LABEL: function2 +void function2() +{ + // CHECK: store i32 1, ptr %[[Y:.+]], + int _Hyperobject y = 1; + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 4, ptr null, ptr null) + // CHECK: load i32 + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr %[[Y]], i64 4, ptr null, ptr null) + // CHECK: load i32 + (void)!x; (void)!y; +} + +// CHECK-LABEL: function3 +void function3() +{ + // CHECK: store i32 1, ptr %[[Y:.+]], + int _Hyperobject y = 1; + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 4, ptr null, ptr null) + // CHECK: load i32 + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr %[[Y]], i64 4, ptr null, ptr null) + // CHECK: load i32 + (void)-x; (void)-y; + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr @x, i64 4, ptr null, ptr null) + // CHECK: load i32 + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr %[[Y]], i64 4, ptr null, ptr null) + // CHECK: load i32 + (void)~x; (void)~y; + // CHECK: %[[XP:.+]] = load ptr, ptr @xp + // CHECK: call ptr @llvm.hyper.lookup.i64(ptr %[[XP]], i64 4, ptr null, ptr null) + // CHECK: load i32 + (void)*xp; +} diff --git a/clang/test/Cilk/hyper-unique.c b/clang/test/Cilk/hyper-unique.c new file mode 100644 index 00000000000000..1830aa7ce71a09 --- /dev/null +++ b/clang/test/Cilk/hyper-unique.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -fsyntax-only + +extern void (*reduce)(void *, void *), (*identity)(void *); + +extern int _Hyperobject(identity, reduce) x; // expected-note{{previous declaration is here}} +int _Hyperobject(identity, reduce) x; // expected-error{{redefinition of 'x' with a different type: 'int _Hyperobject(identity, reduce)' vs 'int _Hyperobject(identity, reduce)'}} + diff --git a/clang/test/Cilk/hyper-zero.c b/clang/test/Cilk/hyper-zero.c new file mode 100644 index 00000000000000..5e604d7770aa5e --- /dev/null +++ b/clang/test/Cilk/hyper-zero.c @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 %s -x c -fopencilk -verify -Wno-error=int-conversion -S -emit-llvm -disable-llvm-passes -o - | FileCheck %s +extern int c; +extern void *d; + +// Test for crash on definition of empty hyperobject +// CHECK-LABEL: __cxx_global_var_init +// CHECK: call void @llvm.reducer.register.i64(ptr @x, i64 0 +typedef char Empty[0]; +Empty _Hyperobject(d, d) x; + +void declares_hyperobject() +{ + // Test for crash on int to pointer conversion in hyperobject definition + int _Hyperobject(c, d) y; + //expected-warning@-1{{incompatible integer to pointer conversion}} +} diff --git a/clang/test/Cilk/implicit-sync-scopes.cpp b/clang/test/Cilk/implicit-sync-scopes.cpp new file mode 100644 index 00000000000000..f7cd754d009e88 --- /dev/null +++ b/clang/test/Cilk/implicit-sync-scopes.cpp @@ -0,0 +1,2550 @@ +// Verify that a sync is added implicitly at the end of appropriate scopes and +// before destructors. +// +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O0 +// RUN: %clang_cc1 %s -O1 -mllvm -simplify-taskframes=false -triple x86_64-unknown-linux-gnu -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-O1 +// expected-no-diagnostics + +class Bar { +public: + Bar(); + ~Bar(); + Bar(const Bar &that); + Bar(Bar &&that); + Bar &operator=(Bar that); + friend void swap(Bar &left, Bar &right); +}; + +void nothrowfn(int a) noexcept; +void catchfn_c(int i, char e) noexcept; +void catchfn_i(int i, int e) noexcept; + +__attribute__((noinline)) +void bar(int a) { + try { + throw a; + } catch (char e) { + catchfn_c(1, e); + } +} + +/// Test that no sync is inserted in a function with no Cilk constructs. + +// CHECK-LABEL: define {{.*}}void @_Z3bari(i32 noundef %a) +// CHECK-NOT: sync +// CHECK: ret void + +__attribute__((noinline)) +void foo(int a) { + try { + bar(a); + } catch (char e) { + catchfn_c(1, e); + } +} + +// CHECK-LABEL: define {{.*}}void @_Z3fooi(i32 noundef %a) +// CHECK-NOT: sync +// CHECK: ret void + +void spawn(int a) { + _Cilk_spawn bar(a); + nothrowfn(a); +} + +/// Test that an implicit sync is inserted for the _Cilk_spawn in spawn(). + +// CHECK-LABEL: define {{.*}}void @_Z5spawni( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] + +// CHECK: [[DETACHED]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: call void @_Z3bari( +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: call void @_Z9nothrowfni( +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONTINUE:.+]] + +// CHECK: [[SYNCCONTINUE]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: ret void + +void spawn_destructor(int a) { + Bar b1; + _Cilk_spawn bar(a); + nothrowfn(a); +} + +/// Test that an implicit sync is inserted for the _Cilk_spawn in +/// spawn_destructor(), and that the sync is inserted before implicit +/// destructors. + +// CHECK-LABEL: define {{.*}}void @_Z16spawn_destructori( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0( +// CHECK: call void @_ZN3BarC1Ev( +// CHECK: %[[TASKFRAME:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[DETUNWIND:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: invoke void @_Z3bari( +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK: call void @_Z9nothrowfni( +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONTINUE:.+]] + +// CHECK: [[SYNCCONTINUE]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[B1CLEANUP:.+]] +// CHECK: [[SUCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev( +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0( +// CHECK-NEXT: ret void + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND]] + +// CHECK: [[DETUNWIND]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUP]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUP_SPLIT:.+]] + +// CHECK-O1: [[B1CLEANUP_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B1CLEANUP_MERGE:.+]] + +// CHECK: [[B1CLEANUP]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-O1: br label %[[B1CLEANUP_MERGE]] +// CHECK-O1: [[B1CLEANUP_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev( +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0( +// CHECK-O1-NEXT: resume +// CHECK-O0-NEXT: br label %[[RESUME:.+]] + +// CHECK-O0: [[RESUME]]: +// CHECK-O0: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int trycatch(int a) { + try { + _Cilk_spawn foo(1); + nothrowfn(2); + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that an implicit sync is inserted for a _Cilk_spawn in a try block, and +/// that the sync is inserted at the end of the try block. + +// CHECK-LABEL: define {{.*}}i32 @_Z8trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[DETUNWIND:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 2) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[CATCHLPAD:.+]] +// CHECK-O0: [[SUCONT]]: +// CHECK-O0-NEXT: br label %[[TRYCONT:.+]] + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND]] + +// CHECK: [[DETUNWIND]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: catch ptr @_ZTIi + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: catch ptr @_ZTIi + +// CHECK-O0: [[TRYCONT]]: +// CHECK-O1: [[SUCONT]]: +// CHECK-NEXT: ret i32 0 + +int trycatch_destructor(int a) { + Bar b1; + try { + Bar b2; + _Cilk_spawn foo(1); + nothrowfn(2); + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that an implicit sync is inserted for a _Cilk_spawn in a try block, and +/// that the sync is inserted at the end of the try block, but before +/// destructors for that try block. + +// CHECK-LABEL: define {{.*}}i32 @_Z19trycatch_destructori( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[BARCONSTRCONT:.+]] unwind label %[[BARCONSTRLPAD:.+]] + +// CHECK: [[BARCONSTRCONT]]: +// CHECK: %[[TASKFRAME:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind label %[[DETUNWIND:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: [[CONTINUE]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 2) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[BARCONSTRLPAD]] +// CHECK: [[SUCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT:.+]] + +// CHECK: [[BARCONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH:.+]] + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND]] + +// CHECK: [[DETUNWIND]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD:.+]] + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK: br label %[[CATCHDISPATCH]] + +// CHECK: [[CATCHDISPATCH]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK: br i1 %{{.+}}, label %[[CATCH:.+]], label %[[EHCLEANUP:.+]] + +// CHECK: [[CATCH]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT]] + +// CHECK: [[TRYCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-NEXT: ret i32 0 + +// CHECK: [[EHCLEANUP]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK: resume + +int mix_spawn_trycatch(int a) { + _Cilk_spawn foo(1); + try { + _Cilk_spawn foo(2); + nothrowfn(3); + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that separate implicit syncs are inserted for the _Cilk_spawn in a try +/// block, inserted at the end of the try block, and for a _Cilk_spawn outside +/// of the try block. + +// CHECK-LABEL: define {{.*}}i32 @_Z18mix_spawn_trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 3) +// CHECK-NEXT: sync within %[[TRYSYNCREG]], label %[[TRYSYNCCONT:.+]] + +// CHECK: [[TRYSYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG]]) +// CHECK-NEXT: to label %[[TRYSUCONT:.+]] unwind label %[[CATCHLPAD:.+]] +// CHECK-O0: [[TRYSUCONT]]: +// CHECK-O0-NEXT: br label %[[TRYCONT:.+]] + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND]] + +// CHECK: [[DETUNWIND]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE:.+]] + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE]] +// CHECK-O1: [[CATCHLPAD_MERGE]]: +// CHECK: br i1 %{{.+}}, label %[[CATCH:.+]], label %[[EHRESUME:.+]] + +// CHECK: [[CATCH]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT]] + +// CHECK-O0: [[TRYCONT]]: +// CHECK-O1: [[TRYSUCONT]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY]]) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK: ret i32 0 + +// CHECK: [[EHRESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int mix_spawn_trycatch_destructors(int a) { + Bar b1; + _Cilk_spawn foo(1); + try { + Bar b2; + _Cilk_spawn foo(2); + nothrowfn(3); + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that separate implicit syncs are inserted for a _Cilk_spawn in a try +/// block and a _Cilk_spawn outside of the try block, and that the sync is +/// inserted before the end of those scopes, but before destructors. + +// CHECK-LABEL: define {{.*}}i32 @_Z30mix_spawn_trycatch_destructorsi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK: %[[TASKFRAME:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[B2CONSTRCONT:.+]] unwind label %[[B2CONSTRLPAD:.+]] + +// CHECK: [[B2CONSTRCONT]]: +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 3) +// CHECK-NEXT: sync within %[[TRYSYNCREG]], label %[[TRYSYNCCONT:.+]] + +// CHECK: [[TRYSYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[CATCHLPAD:.+]] +// CHECK: [[SUCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT:.+]] + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK: [[DETUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[OUTERCLEANUPLPAD:.+]] + +// CHECK: [[OUTERCLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-O0: br label %[[EHCLEANUP:.+]] +// CHECK-O1: resume + +// CHECK: [[B2CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK: [[DETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NOT: catch +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE:.+]] + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE]] +// CHECK-O1: [[CATCHLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK: br label %[[CATCHDISPATCH]] + +// CHECK: [[CATCHDISPATCH]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK: br i1 %{{.+}}, label %[[CATCH:.+]], label %[[TFTRYCLEANUP:.+]] + +// CHECK: [[CATCH]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT]] + +// CHECK: [[TRYCONT]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY]]) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT:.+]] unwind label %[[OUTERCLEANUPLPAD]] +// CHECK-O0: [[SUCONT]]: +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] +// CHECK-O1-NEXT: to label %[[SUCONT2:.+]] unwind label %[[OUTERCLEANUPLPAD]] + +// CHECK: [[TFTRYCLEANUP]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TFTRY]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[OUTERCLEANUPLPAD]] + +// CHECK-O0: [[SYNCCONT2]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT2:.+]] unwind label %[[OUTERCLEANUPLPAD]] + +// CHECK: [[SUCONT2]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-NEXT: ret i32 0 + +// CHECK-O0: [[EHCLEANUP]]: +// CHECK-O0: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int nested_trycatch(int a) { + _Cilk_spawn foo(1); + try { + _Cilk_spawn foo(2); + try { + _Cilk_spawn foo(3); + nothrowfn(4); + } catch (int e) { + catchfn_i(2, e); + } + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that implicit syncs are properly inserted at the end of try blocks when +/// there are nested try-catch statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z15nested_trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK: %[[TFTRY2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[DETUNWIND3:.+]] + +// CHECK: [[DETACHED3]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 3) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[CONTINUE3]] + +// CHECK: [[CONTINUE3]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[CATCHLPAD2:.+]] +// CHECK-O0: [[TRYSUCONT2]]: +// CHECK-O0-NEXT: br label %[[TRYCONT2:.+]] + +// CHECK: [[LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND2]] + +// CHECK: [[DETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD1:.+]] + +// CHECK: [[CATCHLPAD1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O0: br label %[[CATCHDISPATCH1:.+]] + +// CHECK-O0: [[CATCHDISPATCH1]]: +// CHECK: br i1 %{{.+}}, label %[[CATCH1:.+]], label %[[TFTRYCLEANUP1:.+]] + +// CHECK: [[CATCH1]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT1:.+]] + +// CHECK: [[TRYCONT1]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY1]]) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK: ret i32 0 + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND3]] + +// CHECK: [[DETUNWIND3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD2_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi + +// CHECK: [[CATCHLPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O0: br label %[[CATCHDISPATCH2:.+]] + +// CHECK-O0: [[CATCHDISPATCH2]]: +// CHECK: br i1 %{{.+}}, label %[[CATCH2:.+]], label %[[TFTRYCLEANUP2:.+]] + +// CHECK: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK-O0: br label %[[TRYCONT2]] +// CHECK-O1: br label %[[TRYSUCONT2]] + +// CHECK-O0: [[TRYCONT2]]: +// CHECK-O1: [[TRYSUCONT2]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY2]]) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT1:.+]] unwind label %[[CATCHLPAD1]] +// CHECK-O0: [[TRYSUCONT1]]: +// CHECK-O0-NEXT: br label %[[TRYCONT1]] +// CHECK-O1-NEXT: to label %[[TRYCONT1:.+]] unwind label %[[CATCHLPAD1]] + +// CHECK: [[TFTRYCLEANUP2]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TFTRY2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD1]] + +// CHECK: [[TFTRYCLEANUP1]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TFTRY1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[EHRESUME:.+]] + +// CHECK: [[EHRESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int nested_trycatch_destructors(int a) { + Bar b1; + _Cilk_spawn foo(1); + try { + Bar b2; + _Cilk_spawn foo(2); + try { + Bar b3; + _Cilk_spawn foo(3); + nothrowfn(4); + } catch (int e) { + catchfn_i(2, e); + } + } catch (int e) { + catchfn_i(1, e); + } + return 0; +} + +/// Test that implicit syncs are properly inserted at the end of try blocks, but +/// before destructors, when there are nested try-catch statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z27nested_trycatch_destructorsi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD1:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[B2CONSTRCONT:.+]] unwind label %[[B2CONSTRLPAD:.+]] + +// CHECK: [[B2CONSTRCONT]]: +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK: %[[TFTRY2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B3SIZE:.+]], ptr nonnull %[[B3ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B3:.+]]) +// CHECK-NEXT: to label %[[B3CONSTRCONT:.+]] unwind label %[[B3CONSTRLPAD:.+]] + +// CHECK: [[B3CONSTRCONT]]: +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[DETUNWIND3:.+]] + +// CHECK: [[DETACHED3]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 3) +// CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[LPAD3:.+]] + +// CHECK: [[INVOKECONT3]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[CONTINUE3]] + +// CHECK: [[CONTINUE3]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[B3CLEANUPLPAD:.+]] + +// CHECK: [[TRYSUCONT2]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT2:.+]] + +// CHECK: [[LPAD1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK: [[DETUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUPLPAD:.+]] + +// CHECK: [[B1CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-O0: br label %[[EHRESUME:.+]] +// CHECK-O1: resume + +// CHECK: [[B2CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH1:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK: [[DETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD:.+]] + +// CHECK: [[B2CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK: br label %[[CATCHDISPATCH1]] + +// CHECK-O0: [[CATCHDISPATCH1]]: +// CHECK-O0: br i1 %{{.+}}, label %[[CATCH1:.+]], label %[[EHCLEANUP1:.+]] + +// CHECK-O0: [[CATCH1]]: +// CHECK-O0: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK-O0: br label %[[TRYCONT1:.+]] + +// CHECK-O0: [[TRYCONT1]]: +// CHECK-O0-NEXT: call void @llvm.taskframe.end(token %[[TFTRY1]]) +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK-O0: [[SYNCCONT]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT:.+]] unwind label %[[B1CLEANUPLPAD]] + +// CHECK-O0: [[SUCONT]]: +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] + +// CHECK: [[B3CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH2:.+]] + +// CHECK: [[LPAD3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND3]] + +// CHECK: [[DETUNWIND3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B3CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B3CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B3CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK: br label %[[CATCHDISPATCH2]] + +// CHECK: [[CATCHDISPATCH2]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK: br i1 %{{.+}}, label %[[CATCH2:.+]], label %[[EHCLEANUP2:.+]] + +// CHECK: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK: br label %[[TRYCONT2]] + +// CHECK: [[TRYCONT2]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY2]]) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-NEXT: to label %[[TRYSUCONT1:.+]] unwind label %[[B2CLEANUPLPAD]] + +// CHECK: [[TRYSUCONT1]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-O0-NEXT: br label %[[TRYCONT1]] +// CHECK-O1-NEXT: br label %[[TRYCONT1:.+]] + +// CHECK: [[EHCLEANUP2]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TFTRY2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD]] + +// CHECK-O1: [[CATCHDISPATCH1]]: +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-O1: br i1 %{{.+}}, label %[[CATCH1:.+]], label %[[EHCLEANUP1:.+]] + +// CHECK-O1: [[CATCH1]]: +// CHECK-O1: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK-O1: br label %[[TRYCONT1:.+]] + +// CHECK-O1: [[TRYCONT1]]: +// CHECK-O1-NEXT: call void @llvm.taskframe.end(token %[[TFTRY1]]) +// CHECK-O1-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK-O1: [[SYNCCONT]]: +// CHECK-O1-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O1-NEXT: to label %[[SUCONT:.+]] unwind label %[[B1CLEANUPLPAD]] + +// CHECK: [[EHCLEANUP1]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TFTRY1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUPLPAD]] + +// CHECK-O0: [[SYNCCONT2]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT:.+]] unwind label %[[B1CLEANUPLPAD]] + +// CHECK: [[SUCONT]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-NEXT: ret i32 0 + +// CHECK-O0: [[EHRESUME]]: +// CHECK-O0: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int mix_parfor_trycatch(int a) { + _Cilk_spawn foo(1); + try { + _Cilk_spawn foo(2); + _Cilk_for (int i = 0; i < a; ++i) + foo(3); + nothrowfn(4); + } catch (int e) { + catchfn_i(1, e); + } + _Cilk_spawn foo(5); + nothrowfn(6); + return 0; +} + +/// Test that implicit syncs are properly inserted at the end of try blocks when +/// there are mixtures of spawns and parallel for loops. + +// CHECK-LABEL: define {{.*}}i32 @_Z19mix_parfor_trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-O0: detach within %[[PFORSYNCREG]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] unwind label %[[CATCHLPAD:.+]] + +// CHECK-O1: [[LPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND2]] + +// CHECK-O1: [[DETUNWIND2]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD:.+]] + +// CHECK-O1: [[CATCHLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHDISPATCH:.+]] + +// CHECK-O1: detach within %[[PFORSYNCREG]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] unwind label %[[PFORUNW:.+]] + +// CHECK: [[PFORBODY]]: +// CHECK: invoke void @_Z3fooi(i32 noundef 3) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[PFORLPAD:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK: reattach within %[[PFORSYNCREG]], label %[[PFORINC]] + +// CHECK: [[PFORINC]]: +// CHECK-O0: br i1 {{.+}}, label %{{.+}}, label %[[PFORSYNC:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC:.+]], label %{{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC]]: +// CHECK: sync within %[[PFORSYNCREG]], label %[[PFORSYNCCONT:.+]] + +// CHECK-O0: [[LPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND2]] + +// CHECK-O0: [[DETUNWIND2]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] + +// CHECK-O0: [[CATCHLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0-NEXT: catch ptr @_ZTIi +// CHECK-O0: br label %[[CATCHDISPATCH:.+]] + +// CHECK-O1: [[PFORLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORUNW:.+]] + +// CHECK-O1: [[PFORUNW]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1-NEXT: br label %[[CATCHDISPATCH]] + +// CHECK-O1: [[PFORSYNCCONT]]: +// CHECK-O1-NEXT: invoke void @llvm.sync.unwind(token %[[PFORSYNCREG]]) +// CHECK-O1-NEXT: to label %[[PFORSUCONT:.+]] unwind label %[[PFORUNW]] + +// CHECK: [[CATCHDISPATCH]]: +// CHECK: br i1 {{.+}}, label %[[CATCH:.+]], label %[[RESUME:.+]] + +// CHECK: [[CATCH]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT:.+]] + +// CHECK: [[TRYCONT]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY]]) +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] + +// CHECK: [[DETACHED3]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 5) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + +// CHECK: [[CONTINUE3]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 6) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK: ret i32 0 + +// CHECK-O0: [[PFORLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] + +// CHECK-O0: [[PFORSYNCCONT]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[PFORSYNCREG]]) +// CHECK-O0-NEXT: to label %[[PFORSUCONT:.+]] unwind label %[[CATCHLPAD]] + +// CHECK: [[PFORSUCONT]]: +// CHECK-O0-NEXT: br label %[[PFOREND:.+]] +// CHECK-O0: [[PFOREND]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-NEXT: sync within %[[TRYSYNCREG]], label %[[TRYSYNCCONT:.+]] + +// CHECK: [[TRYSYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT:.+]] unwind label %[[CATCHLPAD]] +// CHECK-O0: [[TRYSUCONT]]: +// CHECK-O0: br label %[[TRYCONT]] +// CHECK-O1-NEXT: to label %[[TRYCONT]] unwind label %[[CATCHLPAD]] + +// CHECK: [[RESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int mix_parfor_trycatch_destructors(int a) { + Bar b1; + _Cilk_spawn foo(1); + try { + Bar b2; + _Cilk_spawn foo(2); + _Cilk_for (int i = 0; i < a; ++i) + foo(3); + nothrowfn(4); + } catch (int e) { + catchfn_i(1, e); + } + Bar b3; + _Cilk_spawn foo(5); + nothrowfn(6); + return 0; +} + +/// Test that implicit syncs are properly inserted at the end of try blocks, but +/// before destructors, when there are mixtures of spawns and parallel for +/// loops. + +// CHECK-LABEL: define {{.*}}i32 @_Z31mix_parfor_trycatch_destructorsi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TFTRY:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TRYSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[B2CONSTRLPAD:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT3]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-O0: detach within %[[PFORSYNCREG]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] unwind label %[[CATCHLPAD:.+]] + +// CHECK-O1: [[LPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK-O1: [[DETUNWIND1]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B1CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B1CLEANUP:.+]] + +// CHECK-O1: [[B2CONSTRLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHDISPATCH:.+]] + +// CHECK-O1: [[LPAD2]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK-O1: [[DETUNWIND2]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD:.+]] + +// CHECK-O1: [[CATCHLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B2CLEANUP:.+]] + +// CHECK-O1: detach within %[[PFORSYNCREG]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] unwind label %[[PFORUNW:.+]] + +// CHECK: [[PFORBODY]]: +// CHECK: invoke void @_Z3fooi(i32 noundef 3) +// CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[PFORLPAD:.+]] + +// CHECK: [[INVOKECONT4]]: +// CHECK: reattach within %[[PFORSYNCREG]], label %[[PFORINC]] + +// CHECK: [[PFORINC]]: +// CHECK-O0: br i1 {{.+}}, label %{{.+}}, label %[[PFORSYNC:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC:.+]], label %{{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC]]: +// CHECK: sync within %[[PFORSYNCREG]], label %[[PFORSYNCCONT:.+]] + +// CHECK-O1: [[PFORLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORUNW:.+]] + +// CHECK-O1: [[PFORUNW]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1-NEXT: br label %[[B2CLEANUP]] + +// CHECK-O1: [[PFORSYNCCONT]]: +// CHECK-O1: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-O1-NEXT: sync within %[[TRYSYNCREG]], label %[[TRYSYNCCONT:.+]] + +// CHECK-O1: [[TRYSYNCCONT]]: +// CHECK-O1: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-O1-NEXT: br label %[[TRYCONT:.+]] + +// CHECK-O0: [[LPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK-O0: [[DETUNWIND1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B1CLEANUPLPAD:.+]] + +// CHECK-O0: [[B1CLEANUPLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[B1CLEANUP:.+]] + +// CHECK-O0: [[B2CONSTRLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0-NEXT: catch ptr @_ZTIi +// CHECK-O0: br label %[[CATCHDISPATCH:.+]] + +// CHECK-O0: [[LPAD2]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK-O0: [[DETUNWIND2]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD:.+]] + +// CHECK-O0: [[CATCHLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0-NEXT: catch ptr @_ZTIi +// CHECK-O0: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O0: br label %[[CATCHDISPATCH:.+]] + +// CHECK-O1: [[B2CLEANUP]]: +// CHECK-O1: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: br label %[[CATCHDISPATCH]] + +// CHECK: [[CATCHDISPATCH]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK: br i1 {{.+}}, label %[[CATCH:.+]], label %[[RESUME:.+]] + +// CHECK: [[CATCH]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT]] + +// CHECK: [[TRYCONT]]: +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B3SIZE:.+]], ptr nonnull %[[B3ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B3:.+]]) +// CHECK-O0-NEXT: to label %[[INVOKECONT5:.+]] unwind label %[[B1CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[INVOKECONT5:.+]] unwind label %[[B3LIFETIMEENDLPAD:.+]] + +// CHECK: [[INVOKECONT5]]: +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[DETUNWIND3:.+]] + +// CHECK: [[DETACHED3]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 5) +// CHECK-NEXT: to label %[[INVOKECONT6:.+]] unwind label %[[LPAD3:.+]] + +// CHECK: [[INVOKECONT6]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + +// CHECK: [[CONTINUE3]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 6) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[B3CLEANUPLPAD:.+]] +// CHECK-O0: [[SUCONT]]: +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK-O0: [[PFORLPAD]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] + +// CHECK-O0: [[PFORSYNCCONT]]: +// CHECK-O0: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-O0-NEXT: sync within %[[TRYSYNCREG]], label %[[TRYSYNCCONT:.+]] + +// CHECK-O0: [[TRYSYNCCONT]]: +// CHECK-O0: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(1) %[[B2]]) +// CHECK-O0-NEXT: br label %[[TRYCONT]] + +// CHECK-O1: [[B3LIFETIMEENDLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B3LIFETIMEEND:.+]] + +// CHECK: [[LPAD3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND3]] + +// CHECK: [[DETUNWIND3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B3CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B3CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B3CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK-O0: br label %[[B1CLEANUP]] +// CHECK-O1: br label %[[B3LIFETIMEEND]] + +// CHECK-O0: [[SYNCCONT]]: +// CHECK-O1: [[SUCONT]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK: ret i32 0 + +// CHECK-O1: [[B3LIFETIMEEND]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK-O1: br label %[[B1CLEANUP]] + +// CHECK: [[B1CLEANUP]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-O0: br label %[[RESUME:.+]] + +// CHECK-O0: [[RESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int spawn_trycatch(int a) { + _Cilk_spawn try { + _Cilk_spawn foo(1); + nothrowfn(2); + } catch (int e) { + catchfn_i(1, e); + } + _Cilk_spawn { + try { + _Cilk_spawn foo(3); + nothrowfn(4); + } catch (int e) { + catchfn_i(2, e); + } + }; + _Cilk_spawn foo(5); + nothrowfn(6); + return 0; +} + +/// Test that implicit syncs are properly inserted for spawned statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z14spawn_trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK-O0: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] +// CHECK-O1: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK-DAG: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-DAG: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-DAG: %[[TFTRY1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[TRYDETACHED1:.+]], label %[[TRYDETCONT1:.+]] unwind label %[[TRYDETUNWIND1:.+]] + +// CHECK: [[TRYDETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD1:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[TRYDETCONT1]] + +// CHECK: [[TRYDETCONT1]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 2) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT1:.+]] unwind label %[[CATCHLPAD:.+]] +// CHECK-O0: [[TRYSUCONT1]]: +// CHECK-O0-NEXT: br label %[[TRYCONT1:.+]] +// CHECK-O1-NEXT: to label %[[TRYCONT1:.+]] unwind label %[[CATCHLPAD:.+]] + +// CHECK: [[LPAD1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TRYDETUNWIND1]] + +// CHECK: [[TRYDETUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE:.+]] + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE]] +// CHECK-O1: [[CATCHLPAD_MERGE]]: +// CHECK-O0: br label %[[CATCHDISPATCH1:.+]] + +// CHECK-O0: [[CATCHDISPATCH1]]: +// CHECK-O0: br i1 {{.+}}, label %[[CATCH1:.+]], label %[[TASKCLEANUP1:.+]] + +// CHECK-O0: [[CATCH1]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT1]] + +// CHECK: [[TRYCONT1]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY1]]) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK-O0: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] +// CHECK-O1: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK-DAG: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-DAG: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-DAG: %[[TFTRY2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: %[[TASKFRAME4:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[TRYDETACHED2:.+]], label %[[TRYDETCONT2:.+]] unwind label %[[TRYDETUNWIND2:.+]] + +// CHECK: [[TRYDETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME4]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 3) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[TRYDETCONT2]] + +// CHECK: [[TRYDETCONT2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 4) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[CATCHLPAD2:.+]] + +// CHECK-O0: [[TRYSUCONT2]]: +// CHECK-O0-NEXT: br label %[[TRYCONT2:.+]] +// CHECK-O1-NEXT: to label %[[TRYCONT2:.+]] unwind label %[[CATCHLPAD2:.+]] + +// CHECK-O0: [[TASKCLEANUP1]]: +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND1]] + +// CHECK-O0: [[DETUNWIND1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFUNWIND1:.+]] + +// CHECK-O0: [[TFUNWIND1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[RESUME:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TRYDETUNWIND2]] + +// CHECK: [[TRYDETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME4]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD2_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD2_MERGE:.+]] + +// CHECK: [[CATCHLPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD2_MERGE]] +// CHECK-O1: [[CATCHLPAD2_MERGE]]: +// CHECK-O0: br label %[[CATCHDISPATCH2:.+]] + +// CHECK-O0: [[CATCHDISPATCH2]]: +// CHECK-O0: br i1 {{.+}}, label %[[CATCH2:.+]], label %[[TASKCLEANUP2:.+]] + +// CHECK-O0: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK: br label %[[TRYCONT2]] + +// CHECK: [[TRYCONT2]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY2]]) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: %[[TASKFRAME5:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME5]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 5) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 6) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] + +// CHECK: [[SYNCCONT2]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK: ret i32 0 + +// CHECK-O0: [[TASKCLEANUP2]]: +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK-O0: [[DETUNWIND2]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFUNWIND3:.+]] + +// CHECK-O0: [[TFUNWIND3]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[RESUME]] + +// CHECK-O0: [[RESUME]]: +// CHECK-O0: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int spawn_trycatch_destructors(int a) { + Bar b1; + _Cilk_spawn foo(1); + _Cilk_spawn try { + Bar b2; + _Cilk_spawn foo(2); + nothrowfn(3); + } catch (int e) { + catchfn_i(1, e); + } + _Cilk_spawn { + try { + Bar b3; + _Cilk_spawn foo(4); + nothrowfn(5); + } catch (int e) { + catchfn_i(2, e); + } + }; + Bar b4; + _Cilk_spawn foo(6); + nothrowfn(7); + return 0; +} + +/// Test that implicit syncs are properly inserted before destructors for +/// spawned statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z26spawn_trycatch_destructorsi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD1:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK-DAG: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-DAG: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-DAG: %[[TFTRY1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[B2CONSTRCONT:.+]] unwind label %[[B2CONSTRLPAD:.+]] + +// CHECK: [[B2CONSTRCONT]]: +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[TRYDETACHED1:.+]], label %[[TRYDETCONT1:.+]] unwind label %[[TRYDETUNWIND1:.+]] + +// CHECK: [[TRYDETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[TRYDETCONT1]] + +// CHECK: [[TRYDETCONT1]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 3) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-NEXT: to label %[[TRYSUCONT1:.+]] unwind label %[[B2CLEANUPLPAD:.+]] +// CHECK: [[TRYSUCONT1]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT1:.+]] + +// CHECK: [[LPAD1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK: [[DETUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFUNWIND1:.+]] + +// CHECK: [[TFUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: br label %[[EHCLEANUP:.+]] + +// CHECK: [[B2CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH1:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TRYDETUNWIND1]] + +// CHECK: [[TRYDETUNWIND1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B2CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B2CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B2CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B2CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B2CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK: br label %[[CATCHDISPATCH1]] + +// CHECK: [[CATCHDISPATCH1]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK: br i1 {{.+}}, label %[[CATCH1:.+]], label %[[TASKCLEANUP1:.+]] + +// CHECK: [[CATCH1]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT1]] + +// CHECK: [[TRYCONT1]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY1]]) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK: %[[TASKFRAME4:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] unwind label %[[DETUNWIND3:.+]] + +// CHECK: [[DETACHED3]]: +// CHECK-DAG: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-DAG: call void @llvm.taskframe.use(token %[[TASKFRAME4]]) +// CHECK-DAG: %[[TFTRY2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B3SIZE:.+]], ptr nonnull %[[B3ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B3:.+]]) +// CHECK-NEXT: to label %[[B3CONSTRCONT:.+]] unwind label %[[B3CONSTRLPAD:.+]] + +// CHECK: [[B3CONSTRCONT]]: +// CHECK: %[[TASKFRAME5:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[TRYDETACHED2:.+]], label %[[TRYDETCONT2:.+]] unwind label %[[TRYDETUNWIND2:.+]] + +// CHECK: [[TRYDETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME5]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 4) +// CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[LPAD3:.+]] + +// CHECK: [[INVOKECONT3]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[TRYDETCONT2]] + +// CHECK: [[TRYDETCONT2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 5) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[B3CLEANUPLPAD:.+]] +// CHECK: [[TRYSUCONT2]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT2:.+]] + +// CHECK: [[TASKCLEANUP1]]: +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK: [[DETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFUNWIND1]] + +// CHECK: [[B3CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH2:.+]] + +// CHECK: [[LPAD3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TRYDETUNWIND2]] + +// CHECK: [[TRYDETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME5]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B3CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B3CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B3CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK: br label %[[CATCHDISPATCH2]] + +// CHECK: [[CATCHDISPATCH2]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK: br i1 {{.+}}, label %[[CATCH2:.+]], label %[[TASKCLEANUP2:.+]] + +// CHECK: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK: br label %[[TRYCONT2]] + +// CHECK: [[TRYCONT2]]: +// CHECK-NEXT: call void @llvm.taskframe.end(token %[[TFTRY2]]) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + +// CHECK: [[CONTINUE3]]: +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B4SIZE:.+]], ptr nonnull %[[B4ADDR:.+]]) +// CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B4:.+]]) +// CHECK-NEXT: to label %[[B4CONSTRCONT:.+]] unwind label %[[B4CONSTRLPAD:.+]] + +// CHECK: [[B4CONSTRCONT]]: +// CHECK: %[[TASKFRAME6:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED4:.+]], label %[[CONTINUE4:.+]] unwind label %[[DETUNWIND4:.+]] + +// CHECK: [[DETACHED4]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME6]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 6) +// CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[LPAD4:.+]] + +// CHECK: [[INVOKECONT4]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE4]] + +// CHECK: [[CONTINUE4]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 7) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] +// CHECK: [[SYNCCONT]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[B4CLEANUPLPAD:.+]] +// CHECK-O0: [[SUCONT]]: +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] + +// CHECK: [[TASKCLEANUP2]]: +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND3]] + +// CHECK: [[DETUNWIND3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME4]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TFUNWIND1]] + +// CHECK: [[LPAD4]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND4]] + +// CHECK: [[DETUNWIND4]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME6]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B4CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B4CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B4CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B4CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B4CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-O1: br label %[[B4CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B4CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B4]]) +// CHECK-O0: br label %[[EHCLEANUP]] +// CHECK-O1: br label %[[EHCLEANUP2:.+]] + +// CHECK-O0: [[SYNCCONT]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT:.+]] unwind label %[[B4CLEANUPLPAD]] +// CHECK: [[SUCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B4]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B4SIZE]], ptr nonnull %[[B4ADDR]]) +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-NEXT: ret i32 0 + +// CHECK-O1: [[EHCLEANUP2]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B4SIZE]], ptr nonnull %[[B4ADDR]]) +// CHECK-O1: br label %[[EHCLEANUP]] + +// CHECK: [[EHCLEANUP]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-O0: br label %[[RESUME:.+]] + +// CHECK-O0: [[RESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int parfor_trycatch(int a) { + _Cilk_spawn foo(1); + _Cilk_for(int i = 0; i < a; ++i) + try { + _Cilk_spawn foo(2); + nothrowfn(3); + } catch (int e) { + catchfn_i(1, e); + } + _Cilk_for(int i = 0; i < a; ++i) { + try { + _Cilk_spawn foo(4); + nothrowfn(5); + } catch (int e) { + catchfn_i(2, e); + } + } + _Cilk_spawn foo(6); + nothrowfn(7); + return 0; +} + +/// Test that implicit syncs are properly inserted for parallel-for statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z15parfor_trycatchi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: +// CHECK-O0: detach within %[[PFORSYNCREG1]], label %[[PFORBODY1:.+]], label %[[PFORINC1:.+]] unwind label %[[PFORDU1:.+]] +// CHECK-O1: detach within %[[PFORSYNCREG1]], label %[[PFORBODY1:.+]], label %[[PFORINC1:.+]] + +// CHECK: [[PFORBODY1]]: +// CHECK: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[TRYDET1:.+]], label %[[TRYDETCONT1:.+]] unwind label %[[TRYDU1:.+]] + +// CHECK: [[TRYDET1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD1:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[TRYDETCONT1]] + +// CHECK: [[TRYDETCONT1]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 3) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT1:.+]] unwind label %[[CATCHLPAD:.+]] +// CHECK-O0: [[TRYSUCONT1]]: +// CHECK-O0-NEXT: br label %[[TRYCONT1:.+]] +// CHECK-O1-NEXT: to label %[[TRYCONT1:.+]] unwind label %[[CATCHLPAD:.+]] + +// CHECK: [[LPAD1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TRYDU1]] + +// CHECK: [[TRYDU1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE:.+]] + +// CHECK: [[CATCHLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD_MERGE]] +// CHECK-O1: [[CATCHLPAD_MERGE]]: +// CHECK-O0: br label %[[CATCHDISPATCH1:.+]] + +// CHECK-O0: [[CATCHDISPATCH1]]: +// CHECK-O0: br i1 {{.+}}, label %[[CATCH1:.+]], label %[[TASKCLEANUP1:.+]] + +// CHECK-O0: [[CATCH1]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT1]] + +// CHECK: [[TRYCONT1]]: +// CHECK: reattach within %[[PFORSYNCREG1]], label %[[PFORINC1]] + +// CHECK: [[PFORINC1]]: +// CHECK-O0: br i1 {{.+}}, label {{.+}}, label %[[PFORSYNC1:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC1:.+]], label {{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC1]]: +// CHECK-O0: sync within %[[PFORSYNCREG1]], label %[[PFORSYNCCONT1:.+]] +// CHECK-O1: sync within %[[PFORSYNCREG1]], label %[[PFOREND1:.+]] + +// CHECK-O0: [[TASKCLEANUP1]]: +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG1]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU1]] + +// CHECK-O0: [[PFORDU1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[EHRESUME:.+]] + +// CHECK-O0: [[PFORSYNCCONT1]]: +// CHECK-O0-NEXT: call void @llvm.sync.unwind(token %[[PFORSYNCREG1]]) +// CHECK-O0-NEXT: br label %[[PFOREND1:.+]] + +// CHECK: [[PFOREND1]]: +// CHECK-O0: detach within %[[PFORSYNCREG2]], label %[[PFORBODY2:.+]], label %[[PFORINC2:.+]] unwind label %[[PFORDU2:.+]] +// CHECK-O1: detach within %[[PFORSYNCREG2]], label %[[PFORBODY2:.+]], label %[[PFORINC2:.+]] + +// CHECK: [[PFORBODY2]]: +// CHECK: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[TRYDET2:.+]], label %[[TRYDETCONT2:.+]] unwind label %[[TRYDU2:.+]] + +// CHECK: [[TRYDET2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 4) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[TRYDETCONT2]] + +// CHECK: [[TRYDETCONT2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 5) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-O0-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[CATCHLPAD2:.+]] +// CHECK-O0: [[TRYSUCONT2]]: +// CHECK-O0-NEXT: br label %[[TRYCONT2:.+]] +// CHECK-O1-NEXT: to label %[[TRYCONT2:.+]] unwind label %[[CATCHLPAD2:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TRYDU2]] + +// CHECK: [[TRYDU2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[CATCHLPAD2_SPLIT:.+]] + +// CHECK-O1: [[CATCHLPAD2_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD2_MERGE:.+]] + +// CHECK: [[CATCHLPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[CATCHLPAD2_MERGE]] +// CHECK-O1: [[CATCHLPAD2_MERGE]]: +// CHECK-O0: br label %[[CATCHDISPATCH2:.+]] + +// CHECK-O0: [[CATCHDISPATCH2]]: +// CHECK-O0: br i1 {{.+}}, label %[[CATCH2:.+]], label %[[TASKCLEANUP2:.+]] + +// CHECK-O0: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK: br label %[[TRYCONT2]] + +// CHECK: [[TRYCONT2]]: +// CHECK: reattach within %[[PFORSYNCREG2]], label %[[PFORINC2]] + +// CHECK: [[PFORINC2]]: +// CHECK-O0: br i1 {{.+}}, label {{.+}}, label %[[PFORSYNC2:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC2:.+]], label {{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC2]]: +// CHECK-O0: sync within %[[PFORSYNCREG2]], label %[[PFORSYNCCONT2:.+]] +// CHECK-O1: sync within %[[PFORSYNCREG2]], label %[[PFOREND2:.+]] + +// CHECK-O0: [[TASKCLEANUP2]]: +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU2]] + +// CHECK-O0: [[PFORDU2]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[EHRESUME]] + +// CHECK-O0: [[PFORSYNCCONT2]]: +// CHECK-O0-NEXT: call void @llvm.sync.unwind(token %[[PFORSYNCREG2]]) +// CHECK-O0-NEXT: br label %[[PFOREND2:.+]] + +// CHECK: [[PFOREND2]]: +// CHECK: %[[TASKFRAME4:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME4]]) +// CHECK-NEXT: call void @_Z3fooi(i32 noundef 6) +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 7) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] + +// CHECK: [[SYNCCONT2]]: +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK: ret i32 0 + +// CHECK-O0: [[EHRESUME]]: +// CHECK-O0: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + +int parfor_trycatch_destructors(int a) { + Bar b1; + _Cilk_spawn foo(1); + _Cilk_for(int i = 0; i < a; ++i) + try { + Bar b2; + _Cilk_spawn foo(2); + nothrowfn(3); + } catch (int e) { + catchfn_i(1, e); + } + _Cilk_for(int i = 0; i < a; ++i) { + try { + Bar b3; + _Cilk_spawn foo(4); + nothrowfn(5); + } catch (int e) { + catchfn_i(2, e); + } + } + Bar b4; + _Cilk_spawn foo(6); + nothrowfn(7); + return 0; +} + +/// Test that implicit syncs are properly inserted before destructors for +/// parallel-for statements. + +// CHECK-LABEL: define {{.*}}i32 @_Z27parfor_trycatch_destructorsi( +// CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK: %[[PFORSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B1SIZE:.+]], ptr nonnull %[[B1ADDR:.+]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}%[[B1:.+]]) +// CHECK: %[[TASKFRAME1:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED1:.+]], label %[[CONTINUE1:.+]] unwind label %[[DETUNWIND1:.+]] + +// CHECK: [[DETACHED1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME1]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 1) +// CHECK-NEXT: to label %[[INVOKECONT1:.+]] unwind label %[[LPAD1:.+]] + +// CHECK: [[INVOKECONT1]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE1]] + +// CHECK: [[CONTINUE1]]: + +// CHECK-O1: [[LPAD1]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK-O1: [[DETUNWIND1]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[TASKCLEANUPLPAD:.+]] + +// CHECK-O1: [[TASKCLEANUPLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[TASKCLEANUP1:.+]] + +// CHECK: detach within %[[PFORSYNCREG1]], label %[[PFORBODY1:.+]], label %[[PFORINC1:.+]] unwind label %[[PFORDU1:.+]] + +// CHECK: [[PFORBODY1]]: +// CHECK: %[[TRYSYNCREG1:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B2SIZE:.+]], ptr nonnull %[[B2ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B2:.+]]) +// CHECK-NEXT: to label %[[B2CONSTRCONT:.+]] unwind label %[[B2CONSTRLPAD:.+]] + +// CHECK: [[B2CONSTRCONT]]: +// CHECK: %[[TASKFRAME2:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG1]], label %[[TRYDET1:.+]], label %[[TRYDETCONT1:.+]] unwind label %[[TRYDU1:.+]] + +// CHECK: [[TRYDET1]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 2) +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[LPAD2:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG1]], label %[[TRYDETCONT1]] + +// CHECK: [[TRYDETCONT1]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 3) +// CHECK-NEXT: sync within %[[TRYSYNCREG1]], label %[[TRYSYNCCONT1:.+]] + +// CHECK: [[TRYSYNCCONT1]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG1]]) +// CHECK-NEXT: to label %[[SUCONT:.+]] unwind label %[[B2CLEANUPLPAD:.+]] +// CHECK: [[SUCONT]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT1:.+]] + +// CHECK-O0: [[LPAD1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETUNWIND1]] + +// CHECK-O0: [[DETUNWIND1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME1]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU1]] + +// CHECK-O0: [[PFORDU1]]: +// CHECK-O0-NEXT: landingpad +// CHECK-O0-NEXT: cleanup +// CHECK-O0: br label %[[TASKCLEANUP1:.+]] + +// CHECK: [[B2CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH1:.+]] + +// CHECK: [[LPAD2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TRYDU1]] + +// CHECK: [[TRYDU1]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B2CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B2CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B2CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B2CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B2CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B2CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B2]]) +// CHECK: br label %[[CATCHDISPATCH1:.+]] + +// CHECK: [[CATCHDISPATCH1]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B2SIZE]], ptr nonnull %[[B2ADDR]]) +// CHECK: br i1 {{.+}}, label %[[CATCH1:.+]], label %[[PFORCLEANUP1:.+]] + +// CHECK: [[CATCH1]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 1, +// CHECK: br label %[[TRYCONT1]] + +// CHECK: [[TRYCONT1]]: +// CHECK: reattach within %[[PFORSYNCREG1]], label %[[PFORINC1]] + +// CHECK: [[PFORINC1]]: +// CHECK-O0: br i1 {{.+}}, label {{.+}}, label %[[PFORSYNC1:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC1:.+]], label {{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC1]]: +// CHECK-O0: sync within %[[PFORSYNCREG1]], label %[[PFORSYNCCONT1:.+]] +// CHECK-O1: sync within %[[PFORSYNCREG1]], label %[[PFOREND1:.+]] + +// CHECK: [[PFORCLEANUP1]]: +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG1]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU1]] + +// CHECK-O1: [[PFORDU1]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[TASKCLEANUP1]] + +// CHECK-O0: [[PFORSYNCCONT1]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[PFORSYNCREG1]]) +// CHECK-O0-NEXT: to label %[[PFORSUCONT1:.+]] unwind label %[[PFORDU1]] +// CHECK-O0: [[PFORSUCONT1]]: +// CHECK-O0-NEXT: br label %[[PFOREND1:.+]] + +// CHECK: [[PFOREND1]]: +// CHECK-O0: detach within %[[PFORSYNCREG2]], label %[[PFORBODY2:.+]], label %[[PFORINC2:.+]] unwind label %[[PFORDU1]] +// CHECK-O1: detach within %[[PFORSYNCREG2]], label %[[PFORBODY2:.+]], label %[[PFORINC2:.+]] unwind label %[[PFORDU2:.+]] + +// CHECK: [[PFORBODY2]]: +// CHECK: %[[TRYSYNCREG2:.+]] = {{.*}}call token @llvm.syncregion.start() +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B3SIZE:.+]], ptr nonnull %[[B3ADDR:.+]]) +// CHECK: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B3:.+]]) +// CHECK-NEXT: to label %[[B3CONSTRCONT:.+]] unwind label %[[B3CONSTRLPAD:.+]] + +// CHECK: [[B3CONSTRCONT]]: +// CHECK: %[[TASKFRAME3:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[TRYSYNCREG2]], label %[[TRYDET2:.+]], label %[[TRYDETCONT2:.+]] unwind label %[[TRYDU2:.+]] + +// CHECK: [[TRYDET2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME3]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 4) +// CHECK-NEXT: to label %[[INVOKECONT3:.+]] unwind label %[[LPAD3:.+]] + +// CHECK: [[INVOKECONT3]]: +// CHECK-NEXT: reattach within %[[TRYSYNCREG2]], label %[[TRYDETCONT2]] + +// CHECK: [[TRYDETCONT2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 5) +// CHECK-NEXT: sync within %[[TRYSYNCREG2]], label %[[TRYSYNCCONT2:.+]] + +// CHECK: [[TRYSYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[TRYSYNCREG2]]) +// CHECK-NEXT: to label %[[TRYSUCONT2:.+]] unwind label %[[B3CLEANUPLPAD:.+]] +// CHECK: [[TRYSUCONT2]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK-NEXT: br label %[[TRYCONT2:.+]] + +// CHECK: [[B3CONSTRLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br label %[[CATCHDISPATCH2:.+]] + +// CHECK: [[LPAD3]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[TRYSYNCREG2]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[TRYDU2]] + +// CHECK: [[TRYDU2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME3]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B3CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B3CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B3CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK-O1: br label %[[B3CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B3CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B3]]) +// CHECK: br label %[[CATCHDISPATCH2:.+]] + +// CHECK: [[CATCHDISPATCH2]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B3SIZE]], ptr nonnull %[[B3ADDR]]) +// CHECK: br i1 {{.+}}, label %[[CATCH2:.+]], label %[[PFORCLEANUP2:.+]] + +// CHECK: [[CATCH2]]: +// CHECK: call void @_Z9catchfn_iii(i32 noundef 2, +// CHECK: br label %[[TRYCONT2]] + +// CHECK: [[TRYCONT2]]: +// CHECK: reattach within %[[PFORSYNCREG2]], label %[[PFORINC2]] + +// CHECK: [[PFORINC2]]: +// CHECK-O0: br i1 {{.+}}, label {{.+}}, label %[[PFORSYNC2:.+]], !llvm.loop +// CHECK-O1: br i1 {{.+}}, label %[[PFORSYNC2:.+]], label {{.+}}, !llvm.loop + +// CHECK: [[PFORSYNC2]]: +// CHECK-O0: sync within %[[PFORSYNCREG2]], label %[[PFORSYNCCONT2:.+]] +// CHECK-O1: sync within %[[PFORSYNCREG2]], label %[[PFOREND2:.+]] + +// CHECK: [[PFORCLEANUP2]]: +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[PFORSYNCREG2]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU1]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[PFORDU2]] + +// CHECK-O1: [[PFORDU2]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[TASKCLEANUP1]] + +// CHECK-O0: [[PFORSYNCCONT2]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[PFORSYNCREG2]]) +// CHECK-O0-NEXT: to label %[[PFORSUCONT2:.+]] unwind label %[[PFORDU1]] +// CHECK-O0: [[PFORSUCONT2]]: +// CHECK-O0-NEXT: br label %[[PFOREND2:.+]] + +// CHECK: [[PFOREND2]]: +// CHECK-O1: call void @llvm.lifetime.start.p0(i64 [[B4SIZE:.+]], ptr nonnull %[[B4ADDR:.+]]) +// CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr {{.*}}%[[B4:.+]]) +// CHECK-O0-NEXT: to label %[[B4CONSTRCONT:.+]] unwind label %[[PFORDU1]] +// CHECK-O1-NEXT: to label %[[B4CONSTRCONT:.+]] unwind label %[[B4CONSTRLPAD:.+]] + +// CHECK: [[B4CONSTRCONT]]: +// CHECK: %[[TASKFRAME4:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] unwind label %[[DETUNWIND2:.+]] + +// CHECK: [[DETACHED2]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME4]]) +// CHECK-NEXT: invoke void @_Z3fooi(i32 noundef 6) +// CHECK-NEXT: to label %[[INVOKECONT4:.+]] unwind label %[[LPAD4:.+]] + +// CHECK: [[INVOKECONT4]]: +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: [[CONTINUE2]]: +// CHECK-NEXT: call void @_Z9nothrowfni(i32 noundef 7) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] +// CHECK: [[SYNCCONT2]]: +// CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: to label %[[SUCONT2:.+]] unwind label %[[B4CLEANUPLPAD:.+]] +// CHECK-O0: [[SUCONT2]]: +// CHECK-O0-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] + +// CHECK-O1: [[B4CONSTRLPAD]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[TASKCLEANUP2:.+]] + +// CHECK: [[LPAD4]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[DETUNWIND2]] + +// CHECK: [[DETUNWIND2]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME4]], +// CHECK-O0-NEXT: to label %[[UNREACHABLE]] unwind label %[[B4CLEANUPLPAD]] +// CHECK-O1-NEXT: to label %[[UNREACHABLE]] unwind label %[[B4CLEANUPLPAD_SPLIT:.+]] + +// CHECK-O1: [[B4CLEANUPLPAD_SPLIT]]: +// CHECK-O1-NEXT: landingpad +// CHECK-O1-NEXT: cleanup +// CHECK-O1: br label %[[B4CLEANUPLPAD_MERGE:.+]] + +// CHECK: [[B4CLEANUPLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-O1: br label %[[B4CLEANUPLPAD_MERGE]] +// CHECK-O1: [[B4CLEANUPLPAD_MERGE]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B4]]) +// CHECK-O0: br label %[[TASKCLEANUP1]] +// CHECK-O1: br label %[[TASKCLEANUP2]] + +// CHECK-O0: [[SYNCCONT2]]: +// CHECK-O0-NEXT: invoke void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-O0-NEXT: to label %[[SUCONT2:.+]] unwind label %[[B4CLEANUPLPAD]] +// CHECK: [[SUCONT2]]: +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B4]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B4SIZE]], ptr nonnull %[[B4ADDR]]) +// CHECK-NEXT: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-NEXT: ret i32 0 + +// CHECK-O1: [[TASKCLEANUP2]]: +// CHECK-O1: call void @llvm.lifetime.end.p0(i64 [[B4SIZE]], ptr nonnull %[[B4ADDR]]) +// CHECK-O1-NEXT: br label %[[TASKCLEANUP1]] + +// CHECK: [[TASKCLEANUP1]]: +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}%[[B1]]) +// CHECK-O1-NEXT: call void @llvm.lifetime.end.p0(i64 [[B1SIZE]], ptr nonnull %[[B1ADDR]]) +// CHECK-O0-NEXT: br label %[[EHRESUME]] + +// CHECK-O0: [[EHRESUME]]: +// CHECK: resume + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable + diff --git a/clang/test/Cilk/implicit-sync.c b/clang/test/Cilk/implicit-sync.c new file mode 100644 index 00000000000000..6f81334174d0f6 --- /dev/null +++ b/clang/test/Cilk/implicit-sync.c @@ -0,0 +1,49 @@ +// Verify that a sync is added implicitly at all exits to a function +// when -fcilkplus or -fopencilk is specified. +// +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fcilkplus -ftapir=none -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +int foo(int n); + +// CHECK-LABEL: doesnt_need_implicit_sync( +void doesnt_need_implicit_sync(int n) { + foo(n); + // CHECK-NOT: sync + // CHECK: ret void +} + +// CHECK-LABEL: needs_implicit_sync( +void needs_implicit_sync(int n) { + // CHECK: %[[SYNCREGION:.+]] = call token @llvm.syncregion.start() + // CHECK: detach within %[[SYNCREGION]] + _Cilk_spawn foo(n); + foo(n); + // CHECK: sync within %[[SYNCREGION]], label %[[SYNCCONT:.+]] + // CHECK: [[SYNCCONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: nested_implicit_sync( +void needs_nested_implicit_sync(int n) { + // CHECK: %[[SYNCREGION:.+]] = call token @llvm.syncregion.start() + // CHECK: detach within %[[SYNCREGION]] + _Cilk_spawn { + // CHECK-NOT: call token @llvm.syncregion.start() + foo(n); + } + // CHECK: detach within %[[SYNCREGION]] + _Cilk_spawn { + // CHECK: %[[SYNCREGIONINNER:.+]] = call token @llvm.syncregion.start() + // CHECK: detach within %[[SYNCREGIONINNER]] + _Cilk_spawn foo(n); + // CHECK: sync within %[[SYNCREGIONINNER]], label %[[INNERSYNCCONT:.+]] + // CHECK: [[INNERSYNCCONT]]: + // CHECK-NEXT: reattach within %[[SYNCREGION]] + } + foo(n); + // CHECK: sync within %[[SYNCREGION]], label %[[SYNCCONT:.+]] + // CHECK: [[SYNCCONT]]: + // CHECK-NEXT: ret void +} diff --git a/clang/test/Cilk/looptest.cpp b/clang/test/Cilk/looptest.cpp new file mode 100644 index 00000000000000..637522d1a3647f --- /dev/null +++ b/clang/test/Cilk/looptest.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -std=c++1z -verify %s + +int foo(int n); + +int Cilk_for_tests(int n) { + /* int n = 10; */ + /* _Cilk_for(int i = 0; i < n; i += 2); */ + /* _Cilk_for(int j = 0, __begin = 0, __end = n/2; __begin < __end; j += 2, __begin++); */ + _Cilk_for (int i = 0; i < n; ++i); // expected-warning {{Cilk for loop has empty body}} + _Cilk_for (int i = 0, __end = n; i < __end; ++i); // expected-warning {{Cilk for loop has empty body}} + unsigned long long m = 10; + _Cilk_for (int i = 0; i < m; ++i); // expected-warning {{Cilk for loop has empty body}} + _Cilk_for (int i = 0, __end = m; i < __end; ++i); // expected-warning {{Cilk for loop has empty body}} + + // Check for return statements, which cannot appear anywhere in the body of a + // _Cilk_for loop. + _Cilk_for (int i = 0; i < n; ++i) return 7; // expected-error{{cannot return}} + _Cilk_for (int i = 0; i < n; ++i) + for (int j = 1; j < i; ++j) + return 7; // expected-error{{cannot return}} + + // Check for illegal break statements, which cannot bind to the scope of a + // _Cilk_for loop, but can bind to loops nested within. + _Cilk_for (int i = 0; i < n; ++i) break; // expected-error{{cannot break}} + _Cilk_for (int i = 0; i < n; ++i) + for (int j = 1; j < i; ++j) + break; + return 0; +} + +int pragma_tests(int n) { +#pragma clang loop unroll_count(4) + _Cilk_for (int i = 0; i < n; ++i) + foo(i); + +#pragma cilk grainsize(4) + _Cilk_for (int i = 0; i < n; ++i) + foo(i); + +#pragma cilk grainsize 4 + _Cilk_for (int i = 0; i < n; ++i) + foo(i); + +#pragma cilk grainsize = 4 \ +// expected-warning{{'#pragma cilk grainsize' no longer requires '='}} + _Cilk_for (int i = 0; i < n; ++i) + foo(i); + + return 0; +} + +int scope_tests(int n) { + int A[5]; + _Cilk_for(int i = 0; i < n; ++i) { + int A[5]; + A[i%5] = i; + } + for(int i = 0; i < n; ++i) { + A[i%5] = i%5; + } + return 0; +} diff --git a/clang/test/Cilk/multiple-spawn-args-check.c b/clang/test/Cilk/multiple-spawn-args-check.c new file mode 100644 index 00000000000000..9ef5c34e3ed173 --- /dev/null +++ b/clang/test/Cilk/multiple-spawn-args-check.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +extern int g(int); +extern int h(int, int, int, int, int); + +void f(int x) +{ + h(g(x), _Cilk_spawn g(x), _Cilk_spawn g(x), g(x), g(x)); // expected-error{{multiple spawns among call arguments}} + h(g(x), _Cilk_spawn g(x), _Cilk_spawn g(x), g(x), _Cilk_spawn g(x)); // expected-error{{multiple spawns among call arguments}} expected-note{{another spawn here}} +} diff --git a/clang/test/Cilk/nested-trycatch.cpp b/clang/test/Cilk/nested-trycatch.cpp new file mode 100644 index 00000000000000..41ffa083defce3 --- /dev/null +++ b/clang/test/Cilk/nested-trycatch.cpp @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -std=c++1z -fexceptions -fcxx-exceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +void printf(const char *format, ...); +void pitcher(int x); +void pitcher(const char *x); + +void foo() { + _Cilk_spawn printf("Hi\n"); + try { + _Cilk_spawn pitcher(1); + try { + pitcher("a"); + } catch (int x) { + printf("foo inner caught %d\n", x); + } + } + catch (const char *x) { + printf("foo caught \"%s\"\n", x); + } +} + +// CHECK: define {{.*}}void @_Z3foov() +// CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() +// CHECK: detach within %[[SYNCREG]] +// CHECK: reattach within %[[SYNCREG]] + +// CHECK: call token @llvm.taskframe.create() +// CHECK: %[[SYNCREG1:.+]] = call token @llvm.syncregion.start() +// CHECK: detach within %[[SYNCREG1]] +// CHECK: reattach within %[[SYNCREG1]] + +// CHECK: %[[TF:.+]] = {{.*}}call token @llvm.taskframe.create() +// CHECK: invoke void @_Z7pitcherPKc( +// CHECK-NEXT: to label %{{.+}} unwind label %[[NESTEDLPAD:.+]] + +// CHECK: [[NESTEDLPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK-NEXT: catch ptr @_ZTIi +// CHECK: br i1 {{.+}}, label {{.+}}, label %[[EHCLEANUP:.+]] + +// CHECK: call void @llvm.taskframe.end(token %[[TF]]) +// CHECK: sync within %[[SYNCREG1]] + +// CHECK: [[EHCLEANUP]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TF]], + +int main(int argc, char *argv[]) +{ + try { + foo(); + } catch (int x) { + printf("main caught %d\n", x); + } + return 0; +} diff --git a/clang/test/Cilk/opencilk-spawn.cpp b/clang/test/Cilk/opencilk-spawn.cpp new file mode 100644 index 00000000000000..2e1f97446c57ab --- /dev/null +++ b/clang/test/Cilk/opencilk-spawn.cpp @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -std=c++1z -fexceptions -fcxx-exceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s + +int return_stuff(int i); + +int return_spawn_test(int i){ + return _Cilk_spawn return_stuff(i); // expected-warning{{no parallelism from a '_Cilk_spawn' in a return statement}} +} + +// CHECK-LABEL: define {{(dso_local )?}}{{.*}}i32 @_Z17return_spawn_testi(i32 noundef %i) +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] +// CHECK: [[DETACHED]] +// CHECK: %[[CALL:.+]] = call noundef i32 @_Z12return_stuffi(i32 +// CHECK-NEXT: store i32 %[[CALL]] +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] +// CHECK: [[CONTINUE]] +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] +// CHECK: [[SYNCCONT]] +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] +// CHECK: [[SYNCCONT2]] +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: %[[RETVALLOAD:.+]] = load i32 +// CHECK: ret i32 %[[RETVALLOAD]] + +class Bar { + int val[4] = {0,0,0,0}; +public: + Bar(); + ~Bar(); + Bar(const Bar &that); + Bar(Bar &&that); + Bar &operator=(Bar that); + friend void swap(Bar &left, Bar &right); + + const int getValSpawn(int i) const { return _Cilk_spawn return_stuff(val[i]); } // expected-warning{{no parallelism from a '_Cilk_spawn' in a return statement}} +}; + +int foo(const Bar &b); + +void spawn_infinite_loop() { + _Cilk_spawn { + label1: Bar().getValSpawn(0); + label2: foo(Bar()); + goto label1; + }; +} + +// CHECK-LABEL: define {{(dso_local )?}}void @_Z19spawn_infinite_loopv() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] unwind +// CHECK: [[DETACHED]] +// CHECK: %[[REFTMP:.+]] = alloca %class.Bar +// CHECK: %[[REFTMP2:.+]] = alloca %class.Bar +// CHECK: br label %[[LABEL1:.+]] +// CHECK: [[LABEL1]] +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP]]) +// CHECK: %[[CALL:.+]] = invoke {{.*}}i32 @_ZNK3Bar11getValSpawnEi(ptr {{.*}}dereferenceable(16) %[[REFTMP]], i32 noundef 0) +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP]]) +// CHECK: call void @_ZN3BarC1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP2]]) +// CHECK: %[[CALL:.+]] = invoke {{.*}}i32 @_Z3fooRK3Bar(ptr {{.+}}%[[REFTMP2]]) +// CHECK: call void @_ZN3BarD1Ev(ptr {{.*}}dereferenceable(16) %[[REFTMP2]]) +// CHECK-NEXT: br label %[[LABEL1]] +// CHECK: [[CONTINUE]] +// CHECK: ret void + +// CHECK-LABEL: define linkonce_odr {{(dso_local )?}}{{.*}}i32 @_ZNK3Bar11getValSpawnEi(ptr +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] +// CHECK: [[DETACHED]] +// CHECK: %[[CALL:.+]] = call noundef i32 @_Z12return_stuffi(i32 +// CHECK-NEXT: store i32 %[[CALL]] +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] +// CHECK: [[CONTINUE]] +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT:.+]] +// CHECK: [[SYNCCONT]] +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: sync within %[[SYNCREG]], label %[[SYNCCONT2:.+]] +// CHECK: [[SYNCCONT2]] +// CHECK-NEXT: call void @llvm.sync.unwind(token %[[SYNCREG]]) +// CHECK-NEXT: %[[RETVALLOAD:.+]] = load i32 +// CHECK: ret i32 %[[RETVALLOAD]] diff --git a/clang/test/Cilk/reducer-skip-init.c b/clang/test/Cilk/reducer-skip-init.c new file mode 100644 index 00000000000000..68f5be2e2c1b08 --- /dev/null +++ b/clang/test/Cilk/reducer-skip-init.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 %s -fopencilk -verify -fsyntax-only +extern void identity_short(void *); +extern void reduce_short(void *, void *); + +int into(int x) +{ + if (x) + goto skip; // expected-error{{cannot jump}} + short _Hyperobject(identity_short, reduce_short) y; // expected-note{{jump bypasses initialization}} +skip: + return y; +} diff --git a/clang/test/Cilk/regiontest.c b/clang/test/Cilk/regiontest.c new file mode 100644 index 00000000000000..2b5adc8cc5e627 --- /dev/null +++ b/clang/test/Cilk/regiontest.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -ftapir=none -S -emit-llvm -o - | FileCheck %s + +int bar(); +int baz(int); + +// CHECK-LABEL: syncreg_spawn( +int syncreg_spawn(int n) { + // CHECK: %[[SYNCREGTOP:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREGTOP]], label %[[DETACHEDBAR:.+]], + // CHECK: [[DETACHEDBAR]]: + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) + // CHECK-NEXT: call i32 (...) @bar + int x = _Cilk_spawn bar(); + // CHECK: %[[TASKFRAME2:.+]] = call token @llvm.taskframe.create() + // CHECK: detach within %[[SYNCREGTOP]], label %[[DETACHEDBAZ:.+]], + // CHECK: [[DETACHEDBAZ]]: + // CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME2]]) + // CHECK-NEXT: call i32 @baz + int y = _Cilk_spawn baz(n); + // CHECK: sync within %[[SYNCREGTOP]], label %[[SYNCCONT:.+]] + _Cilk_sync; + // CHECK: [[SYNCCONT]]: + // CHECK: add + // CHECK: ret + return x+y; +} + +// CHECK-LABEL: syncreg_loop( +void syncreg_loop(int n) { + // CHECK: %[[SYNCREGLOOP:.+]] = call token @llvm.syncregion.start() + // CHECK-DAG: detach within %[[SYNCREGLOOP]] + // CHECK-DAG: sync within %[[SYNCREGLOOP]] + _Cilk_for(int i = 0; i < n; ++i) { + baz(i); + } +} + +// CHECK-LABEL: mixed_spawn_and_loop( +int mixed_spawn_and_loop(int n) { + // CHECK: %[[SYNCREGTOP:.+]] = call token @llvm.syncregion.start() + // CHECK: %[[SYNCREGLOOP:.+]] = call token @llvm.syncregion.start() + // CHECK: detach within %[[SYNCREGTOP]] + int x = _Cilk_spawn bar(); + // CHECK-DAG: detach within %[[SYNCREGLOOP]] + // CHECK-DAG: sync within %[[SYNCREGLOOP]] + _Cilk_for(int i = 0; i < n; ++i) { + baz(i); + } + // CHECK: detach within %[[SYNCREGTOP]] + int y = _Cilk_spawn baz(n); + // CHECK: sync within %[[SYNCREGTOP]] + _Cilk_sync; + return x+y; +} diff --git a/clang/test/Cilk/spawn-atomic.c b/clang/test/Cilk/spawn-atomic.c new file mode 100644 index 00000000000000..b972f2dfa17be6 --- /dev/null +++ b/clang/test/Cilk/spawn-atomic.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +extern int cilk_main(int, char **); + +void f(_Atomic int *out, int argc, char **argv) +{ + __c11_atomic_store(out, _Cilk_spawn cilk_main(argc, argv), __ATOMIC_RELAXED); +} + +// CHECK-LABEL define {{.*}}void @f( +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTIN:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call i32 @cilk_main( +// CHECK: store atomic i32 +// CHECK: reattach within %[[SYNCREG]], label %[[CONTIN]] diff --git a/clang/test/Cilk/spawn-builtin.c b/clang/test/Cilk/spawn-builtin.c new file mode 100644 index 00000000000000..082976737f62e7 --- /dev/null +++ b/clang/test/Cilk/spawn-builtin.c @@ -0,0 +1,76 @@ +// Check the spawning of builtins. +// +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s + +// Thanks to Brian Wheatman for originally finding the bug captured by this +// test. +void spawn_memcpy(float *A, float *B, int n) { + _Cilk_spawn __builtin_memcpy(A, B, sizeof(float) * n/2); + __builtin_memcpy(A+n/2, B+n/2, sizeof(float) * (n-n/2)); + _Cilk_sync; +} + +// CHECK-LABEL: define {{.*}}void @spawn_memcpy( +// CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETBLOCK:.+]], label %[[CONT:.+]] +// CHECK: [[DETBLOCK]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: call void @llvm.memcpy +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONT]] +// CHECK: call void @llvm.memcpy +// CHECK: sync within %[[SYNCREG]] + +void spawn_unreachable() { + _Cilk_spawn __builtin_unreachable(); +} + +// CHECK-LABEL: define {{.*}}void @spawn_unreachable( +// CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETBLOCK:.+]], label %[[CONT:.+]] +// CHECK: [[DETBLOCK]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: unreachable +// CHECK-NOT: reattach +// CHECK: [[CONT]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void cilkfor_unreachable() { + _Cilk_for(int i = 0; i < 1; ++i) + __builtin_unreachable(); +} + +// CHECK-LABEL: define {{.*}}void @cilkfor_unreachable( +// CHECK: detach within %[[SYNCREG:.+]], label %[[PFORBODY:.+]], label %[[PFORINC:.+]] +// CHECK: [[PFORBODY]]: +// CHECK: unreachable +// Clang codegen for Cilk emits a block with no predecessors that contains a +// reattach. +// CHECK: reattach within %[[SYNCREG]], label %[[PFORINC]] +// CHECK: [[PFORINC]]: +// CHECK: sync within %[[SYNCREG]] + +void spawn_trap() { + _Cilk_spawn __builtin_trap(); +} + +// CHECK-LABEL: define {{.*}}void @spawn_trap( +// CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETBLOCK:.+]], label %[[CONT:.+]] +// CHECK: [[DETBLOCK]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: call void @llvm.trap() +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONT]] +// CHECK: [[CONT]]: +// CHECK-NEXT: sync within %[[SYNCREG]] + +void spawn_assume() { + _Cilk_spawn __builtin_assume(0); // expected-warning{{Failed to emit spawn}} +} + +// It doesn't make sense to spawn an assume, so we expect not to find any +// Tapir instructions. +// CHECK-LABEL: define {{.*}}void @spawn_assume( +// CHECK-NOT: detach +// CHECK: call void @llvm.assume +// CHECK-NOT: sync +// CHECK: ret void diff --git a/clang/test/Cilk/spawn-call-arg.c b/clang/test/Cilk/spawn-call-arg.c new file mode 100644 index 00000000000000..595747f4340277 --- /dev/null +++ b/clang/test/Cilk/spawn-call-arg.c @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +extern int g(int); + +void f(int x) +{ + g(_Cilk_spawn g(x)); +} + +// CHECK-LABEL define {{.*}}void @f( +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTIN:.+]] + +// CHECK: [[DETACHED]]: +// CHECK: call i32 @g( +// CHECK: call i32 @g( +// CHECK: reattach within %[[SYNCREG]], label %[[CONTIN]] diff --git a/clang/test/Cilk/spawn-decl-with-constructors.cpp b/clang/test/Cilk/spawn-decl-with-constructors.cpp new file mode 100644 index 00000000000000..6e5fe18021f746 --- /dev/null +++ b/clang/test/Cilk/spawn-decl-with-constructors.cpp @@ -0,0 +1,267 @@ +// Test code generation for uses of _Cilk_spawn in variable declarations. +// +// Thanks to Dr. I-Ting Angelina Lee for contributing the original source code +// for this test case. +// +// RUN: %clang_cc1 %s -std=c++11 -triple x86_64-unknown-linux-gnu -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -S -emit-llvm -o - | FileCheck %s + +template +struct pair { + T1 first; + T2 second; + pair(T1 first, T2 second) : first(first), second(second) {} + + template + pair(const pair& __p) + : first(__p.first), second(__p.second) { } +}; + +template +pair make_pair(T1 &&t1, T2 &&t2) { + return pair(t1, t2); +} + +template +struct _seq { + T* A; + long n; + _seq() {A = 0; n=0;} + _seq(T* _A, long _n) : A(_A), n(_n) {} + void del() {free(A);} +}; + +typedef long intT; + +template class _point2d; + +template class _vect2d { +public: + typedef _floatT floatT; + typedef _point2d pointT; + typedef _vect2d vectT; + floatT x; floatT y; + _vect2d(floatT xx,floatT yy) : x(xx),y(yy) {} + _vect2d() {x=0;y=0;} + _vect2d(pointT p); + _vect2d(floatT* p) : x(p[0]), y(p[1]) {}; + vectT operator+(vectT op2) {return vectT(x + op2.x, y + op2.y);} + vectT operator-(vectT op2) {return vectT(x - op2.x, y - op2.y);} + pointT operator+(pointT op2); + vectT operator*(floatT s) {return vectT(x * s, y * s);} + vectT operator/(floatT s) {return vectT(x / s, y / s);} + floatT operator[] (int i) {return (i==0) ? x : y;}; + floatT dot(vectT v) {return x * v.x + y * v.y;} + floatT cross(vectT v) { return x*v.y - y*v.x; } + floatT maxDim() {return max(x,y);} + floatT Length(void) { return sqrt(x*x+y*y);} + static const int dim = 3; +}; + +template class _point2d { +public: + typedef _floatT floatT; + typedef _vect2d vectT; + typedef _point2d pointT; + floatT x; floatT y; + int dimension() {return 2;} + _point2d(floatT xx,floatT yy) : x(xx),y(yy) {} + _point2d() {x=0;y=0;} + _point2d(vectT v) : x(v.x),y(v.y) {}; + _point2d(floatT* p) : x(p[0]), y(p[1]) {}; + vectT operator-(pointT op2) {return vectT(x - op2.x, y - op2.y);} + pointT operator+(vectT op2) {return pointT(x + op2.x, y + op2.y);} + floatT operator[] (int i) {return (i==0) ? x : y;}; + pointT minCoords(pointT b) { return pointT(min(x,b.x),min(y,b.y)); } + pointT maxCoords(pointT b) { return pointT(max(x,b.x),max(y,b.y)); } + int quadrant(pointT center) { + int index = 0; + if (x > center.x) index += 1; + if (y > center.y) index += 2; + return index; + } + // returns a pointT offset by offset in one of 4 directions + // depending on dir (an integer from [0..3]) + pointT offsetPoint(int dir, floatT offset) { + floatT xx = x + ((dir & 1) ? offset : -offset); + floatT yy = y + ((dir & 2) ? offset : -offset); + return pointT(xx,yy); + } + bool outOfBox(pointT pt, floatT hsize) { + return ((x - hsize > pt.x) || (x + hsize < pt.x) || + (y - hsize > pt.y) || (y + hsize < pt.y)); + } + static const int dim = 2; +}; + +typedef _point2d point2d; + +template +pair<_FIter, _FIter> +minmax_element(_FIter, _FIter, _Compare); + +pair, pair > merge(pair, pair > minmax_e1, pair, pair > minmax_e2); + +bool compare_x(const point2d &p1, const point2d &p2){ + if(p1.x < p2.x){ + return true; + }else if (p1.x == p2.x){ + return p1.y < p2.y; + }else{ + return false; + } +} + +bool compare_y(const point2d &p1, const point2d &p2){ + if(p1.y < p2.y){ + return true; + }else if(p1.x == p2.x){ + return p1.x < p2.x; + }else{ + return false; + } +} +static const intT minmax_base = 2000; + +double triArea(point2d a, point2d b, point2d c); +struct aboveLineP { + point2d l, r; + point2d* P; + aboveLineP(point2d* _P, point2d &_l, point2d &_r) : P(_P), l(_l), r(_r) {} + bool operator() (point2d &i) {return triArea(l, r, i) > 0.0;} +}; + +point2d *offset_helper(point2d * buf, long off){ + char *tmp = (char *)buf; + tmp = tmp + off; + return (point2d *)tmp; +} + +intT quickHullP(point2d* P, point2d* Ptmp, intT n, point2d l, point2d r, intT depth); + +template +intT wrapped_filter_new(ET* In, ET* Out, intT n, PRED p); + +pair, pair > find_minmax_xy(point2d *p, intT n){ + if (n < minmax_base) { + pair minmax_ex = minmax_element(p, p+n, compare_x); + pair minmax_ey = minmax_element(p, p+n, compare_y); + return make_pair(minmax_ex, minmax_ey); + } else { + pair, pair > minmax_e1 = _Cilk_spawn find_minmax_xy(p, n/2); + pair, pair > minmax_e2 = find_minmax_xy(p+n/2, n - n/2); + _Cilk_sync; + return merge(minmax_e1, minmax_e2); + } +} + +// CHECK-LABEL: @_Z14find_minmax_xyP8_point2dIdEl( +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] +// CHECK: [[DETACHED]]: +// CHECK: call void @_Z14find_minmax_xyP8_point2dIdEl(ptr sret(%struct.pair) {{.*}}%minmax_e1, +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +_seq hullP(point2d* P, intT n, point2d *Ptmp) { + intT num_pages = (n * sizeof(point2d)); + pair, pair > minmaxxy1 = _Cilk_spawn find_minmax_xy(P, n/4); + pair, pair > minmaxxy2 = _Cilk_spawn find_minmax_xy(offset_helper(P, num_pages), n/4); + pair, pair > minmaxxy3 = _Cilk_spawn find_minmax_xy(offset_helper(P, num_pages*2), n/4); + pair, pair > minmaxxy4 = find_minmax_xy(offset_helper(P, num_pages*3), n - n/4*3); + _Cilk_sync; + + pair, pair > minmaxxy = merge(minmaxxy1, minmaxxy2); + minmaxxy = merge(minmaxxy, minmaxxy3); + minmaxxy = merge(minmaxxy, minmaxxy4); + + point2d l = *minmaxxy.first.first; + point2d r = *minmaxxy.first.second; + + point2d b = *minmaxxy.second.first; + point2d t = *minmaxxy.second.second; + + intT n1 = _Cilk_spawn wrapped_filter_new(P, offset_helper(Ptmp, 0), n, aboveLineP(P, l, t)); + intT n2 = _Cilk_spawn wrapped_filter_new(P, offset_helper(Ptmp, num_pages), n, aboveLineP(P, t, r)); + intT n3 = _Cilk_spawn wrapped_filter_new(P, offset_helper(Ptmp, num_pages*2), n, aboveLineP(P, r, b)); + intT n4 = wrapped_filter_new(P, offset_helper(Ptmp, num_pages*3), n, aboveLineP(P, b, l)); + _Cilk_sync; + intT m1; intT m2; intT m3; intT m4; + m1 = _Cilk_spawn quickHullP(Ptmp, P, n1, l, t, 5); + m2 = _Cilk_spawn quickHullP(offset_helper(Ptmp, num_pages), offset_helper(P, num_pages), n2, t, r, 5); + m3 = _Cilk_spawn quickHullP(offset_helper(Ptmp, num_pages*2), offset_helper(P, num_pages*2), n3, r, b, 5); + m4 = quickHullP(offset_helper(Ptmp, num_pages*3), offset_helper(P, num_pages*3), n4, b, l, 5); + _Cilk_sync; + + int offset = 0; + if (l.x != t.x || l.y != t.y){ + offset++; + } + _Cilk_for (intT i=0; i < m1; i++) P[i+offset] = Ptmp[i]; + if (t.x != r.x || t.y != r.y){ + offset++; + } + _Cilk_for (intT i=0; i < m2; i++) P[i+m1+offset] = offset_helper(Ptmp, num_pages)[i]; + if (r.x != b.x || r.y != b.y){ + offset++; + } + _Cilk_for (intT i=0; i < m3; i++) P[i+m1+m2+offset] = offset_helper(Ptmp, num_pages*2)[i]; + if (b.x != l.x || b.y != l.y){ + offset++; + } + _Cilk_for (intT i=0; i < m4; i++) P[i+m1+m2+m3+offset] = offset_helper(Ptmp, num_pages*3)[i]; + + int offset2 = 0; + + P[0] = l; + offset2 += m1; + if(l.x != t.x || l.y != t.y){ + offset2++; + P[offset2] = t; + } + + offset2 += m2; + if(t.x != r.x || t.y != r.y){ + offset2++; + P[offset2] = r; + } + + offset2 += m3; + if(t.x != r.x || t.y != r.y){ + offset2++; + P[offset2] = b; + } + return _seq(P, offset2+1); +} + +// CHECK-LABEL: @_Z5hullPP8_point2dIdElS1_( + +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETACHED:.+]], label %[[CONTINUE:.+]] +// CHECK: [[DETACHED]]: +// CHECK: call void @_Z14find_minmax_xyP8_point2dIdEl(ptr sret(%struct.pair) {{.*}}%minmaxxy1, +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE]] + +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED2:.+]], label %[[CONTINUE2:.+]] +// CHECK: [[DETACHED2]]: +// CHECK: call void @_Z14find_minmax_xyP8_point2dIdEl(ptr sret(%struct.pair) {{.*}}%minmaxxy2, +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE2]] + +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED3:.+]], label %[[CONTINUE3:.+]] +// CHECK: [[DETACHED3]]: +// CHECK: call void @_Z14find_minmax_xyP8_point2dIdEl(ptr sret(%struct.pair) {{.*}}%minmaxxy3, +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE3]] + +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED4:.+]], label %[[CONTINUE4:.+]] +// CHECK: [[DETACHED4]]: +// CHECK: %[[RET4:.+]] = call {{.*}}i64 @_Z18wrapped_filter_newI8_point2dIdEl10aboveLinePET0_PT_S5_S3_T1_( +// CHECK-NEXT: store i64 %[[RET4]] +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE4]] + +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED5:.+]], label %[[CONTINUE5:.+]] +// CHECK: [[DETACHED5]]: +// CHECK: %[[RET5:.+]] = call {{.*}}i64 @_Z18wrapped_filter_newI8_point2dIdEl10aboveLinePET0_PT_S5_S3_T1_( +// CHECK-NEXT: store i64 %[[RET5]] +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE5]] + +// CHECK: detach within %[[SYNCREG]], label %[[DETACHED6:.+]], label %[[CONTINUE6:.+]] +// CHECK: [[DETACHED6]]: +// CHECK: %[[RET6:.+]] = call {{.*}}i64 @_Z18wrapped_filter_newI8_point2dIdEl10aboveLinePET0_PT_S5_S3_T1_( +// CHECK-NEXT: store i64 %[[RET6]] +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTINUE6]] diff --git a/clang/test/Cilk/spawn-expr.c b/clang/test/Cilk/spawn-expr.c new file mode 100644 index 00000000000000..96cffd89ff2410 --- /dev/null +++ b/clang/test/Cilk/spawn-expr.c @@ -0,0 +1,15 @@ +// Check the spawning of builtins. +// +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s + +int g(int); + +int f() { + int x = _Cilk_spawn 0; // expected-warning {{Failed to emit spawn}} + g(_Cilk_spawn 7); // expected-warning {{Failed to emit spawn}} + return _Cilk_spawn 1; // expected-warning {{no parallelism from a '_Cilk_spawn' in a return statement}} expected-warning {{Failed to emit spawn}} +} + +// CHECK-LABEL: define {{.*}}i32 @f( +// CHECK-NOT: detach + diff --git a/clang/test/Cilk/spawn-in-cilk-for.c b/clang/test/Cilk/spawn-in-cilk-for.c new file mode 100644 index 00000000000000..a1f462314a6ee2 --- /dev/null +++ b/clang/test/Cilk/spawn-in-cilk-for.c @@ -0,0 +1,101 @@ +// Verify proper creation of sync regions and syncs when _Cilk_spawn statements +// are nested within a _Cilk_for. +// +// Thanks to Jackie Bredenberg and Yuan Yao for the original source code for +// this test case. +// +// RUN: %clang_cc1 %s -std=gnu11 -triple x86_64-unknown-linux-gnu -O0 -fopencilk -verify -S -emit-llvm -ftapir=none -o - | FileCheck %s +// expected-no-diagnostics + +// A two-dimensional line. +typedef struct Line { + unsigned char quadrant; // stores 4 bits about position within the quadtree +} Line; + +typedef struct LineList { + unsigned int size; +} LineList; + +typedef struct Quadtree { + // If this quadtree is not expanded, then all four below are NULL + struct Quadtree* LD; // x < center.x, y < center.y + struct Quadtree* LU; // x < center.x, y >= center.y + struct Quadtree* RD; // x >= center.x, y < center.y + struct Quadtree* RU; // x >= center.x, y >= center.y + + // A list of lines that do not fit inside any subtree + LineList* lines; + + // An array version of LineList, for parallelization. + Line** lineArray; + + // Number of lines in the quadtree (including ones in the subtrees) + unsigned int numLines; +} Quadtree; + +typedef struct CollisionWorld CollisionWorld; + +#define LD_BIT 0b0001 +#define LU_BIT 0b0010 +#define RD_BIT 0b0100 +#define RU_BIT 0b1000 + +void Quadtree_findAllIntersectingPairsWithLine(Quadtree* quadtree, + CollisionWorld* collisionWorld, + Line* line); + +// search through the quadtree to find any collisions contained within it, +// including collisions with lines in the sub-trees +void Quadtree_findAllIntersectingPairs(Quadtree* quadtree, + CollisionWorld* collisionWorld) { + LineList* currentLevelLines = quadtree->lines; + int nodeSize = currentLevelLines->size; + + // For each line, check it with the subtrees if there are possible intersections. + _Cilk_for (int i = 0; i < nodeSize; i++) { + // CHECK: detach within %[[OUTERSR:.+]], label %{{.+}}, label + Line* line = quadtree->lineArray[i]; + + // Checking if we need to check this line against each of the subtrees + // If none of the four sides of the parallelogram swept out by the line + // intersect any of the four sides and is not inside, then don't bother checking. + if (quadtree->LD->numLines > 0 && (line->quadrant & LD_BIT)) { + _Cilk_spawn Quadtree_findAllIntersectingPairsWithLine(quadtree->LD, collisionWorld, line); + // CHECK: detach within %[[INNERSR:.+]], label %{{.+}}, label + } + if (quadtree->LU->numLines > 0 && (line->quadrant & LU_BIT)) { + _Cilk_spawn Quadtree_findAllIntersectingPairsWithLine(quadtree->LU, collisionWorld, line); + // CHECK: detach within %[[INNERSR]], + } + if (quadtree->RD->numLines > 0 && (line->quadrant & RD_BIT)) { + _Cilk_spawn Quadtree_findAllIntersectingPairsWithLine(quadtree->RD, collisionWorld, line); + // CHECK: detach within %[[INNERSR]], + } + if (quadtree->RU->numLines > 0 && (line->quadrant & RU_BIT)) { + _Cilk_spawn Quadtree_findAllIntersectingPairsWithLine(quadtree->RU, collisionWorld, line); + // CHECK: detach within %[[INNERSR]], + } + // CHECK: sync within %[[INNERSR]], + // CHECK: reattach within %[[OUTERSR]], + } + // CHECK: sync within %[[OUTERSR]], + + // Find intersections within the subtrees + if (quadtree->LD->numLines > 1) { + _Cilk_spawn Quadtree_findAllIntersectingPairs(quadtree->LD, collisionWorld); + // CHECK: detach within %[[OUTERSR2:.+]], label %{{.+}}, label + } + if (quadtree->LU->numLines > 1) { + _Cilk_spawn Quadtree_findAllIntersectingPairs(quadtree->LU, collisionWorld); + // CHECK: detach within %[[OUTERSR2]], + } + if (quadtree->RD->numLines > 1) { + _Cilk_spawn Quadtree_findAllIntersectingPairs(quadtree->RD, collisionWorld); + // CHECK: detach within %[[OUTERSR2]], + } + if (quadtree->RU->numLines > 1) { + Quadtree_findAllIntersectingPairs(quadtree->RU, collisionWorld); + } + _Cilk_sync; + // CHECK: sync within %[[OUTERSR2]], +} diff --git a/clang/test/Cilk/spawn-return.cpp b/clang/test/Cilk/spawn-return.cpp new file mode 100644 index 00000000000000..44e83426a19da3 --- /dev/null +++ b/clang/test/Cilk/spawn-return.cpp @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 -std=c++1z -fopencilk -fsyntax-only -verify %s + +int main() { _Cilk_spawn return 0; } // expected-error{{cannot return from within a '_Cilk_spawn' statement}} diff --git a/clang/test/Cilk/spawn-template.cpp b/clang/test/Cilk/spawn-template.cpp new file mode 100644 index 00000000000000..206d2827bea410 --- /dev/null +++ b/clang/test/Cilk/spawn-template.cpp @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +template +intT fib(intT n) { + if (n < 2) return n; + intT x = _Cilk_spawn fib(n - 1); + intT y = fib(n - 2); + _Cilk_sync; + return (x + y); +} + +template +intT fib_exc(intT n) { + if (n < 2) return n; + try { + intT x = _Cilk_spawn fib_exc(n - 1); + intT y = fib_exc(n - 2); + _Cilk_sync; + return (x + y); + } catch (...) { + return intT(-1); + } +} + +long foo() { + return fib(38) + fib_exc(38); +} + +// CHECK-LABEL: define {{.+}}i32 @_Z3fibIiET_S0_(i32 noundef %n) + +// CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETBLK:.+]], label %[[CONTBLK:.+]] + +// CHECK: [[DETBLK]]: +// CHECK-NEXT: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK-NEXT: %[[RETVAL:.+]] = call noundef i32 @_Z3fibIiET_S0_ +// CHECK-NEXT: store i32 %[[RETVAL]], ptr +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTBLK]] + +// CHECK: [[CONTBLK]]: +// CHECK: %[[RETVAL2:.+]] = call noundef i32 @_Z3fibIiET_S0_ +// CHECK-NEXT: store i32 %[[RETVAL2]] +// CHECK-NEXT: sync within %[[SYNCREG]] + + +// CHECK-LABEL: define {{.+}}i32 @_Z7fib_excIiET_S0_(i32 noundef %n) + +// CHECK: %[[TASKFRAME:.+]] = call token @llvm.taskframe.create() +// CHECK: detach within %[[SYNCREG:.+]], label %[[DETBLK:.+]], label %[[CONTBLK:.+]] unwind label %[[TFLPAD:.+]] + +// CHECK: [[DETBLK]]: +// CHECK: call void @llvm.taskframe.use(token %[[TASKFRAME]]) +// CHECK: %[[RETVAL:.+]] = invoke noundef i32 @_Z7fib_excIiET_S0_ +// CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[DETLPAD:.+]] + +// CHECK: [[INVOKECONT]]: +// CHECK-NEXT: store i32 %[[RETVAL]], ptr +// CHECK-NEXT: reattach within %[[SYNCREG]], label %[[CONTBLK]] + +// CHECK: [[CONTBLK]]: +// CHECK: %[[RETVAL2:.+]] = invoke noundef i32 @_Z7fib_excIiET_S0_ +// CHECK-NEXT: to label %[[INVOKECONT2:.+]] unwind label %[[OUTERLPAD:.+]] + +// CHECK: [[INVOKECONT2]]: +// CHECK-NEXT: store i32 %[[RETVAL2]] +// CHECK-NEXT: sync within %[[SYNCREG]] + +// CHECK: [[DETLPAD]]: +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[TFLPAD]] + +// CHECK: [[TFLPAD]]: +// CHECK: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[TASKFRAME]], +// CHECK-NEXT: to label %[[UNREACHABLE]] unwind label %[[OUTERLPAD]] + +// CHECK: [[UNREACHABLE]]: +// CHECK-NEXT: unreachable diff --git a/clang/test/Cilk/spawntest.cpp b/clang/test/Cilk/spawntest.cpp new file mode 100644 index 00000000000000..4b99b4cae3bd8b --- /dev/null +++ b/clang/test/Cilk/spawntest.cpp @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -std=c++11 -verify %s + +class Bar { + int myVal; +public: + Bar(); + + int getValue(); +}; + +class Foo { + Bar myBar; +public: + Foo(); + + Bar &getBar(); +}; + +Bar &getBarFromFoo(Foo &f); +int getintFromBar(Bar &b); + +int bar(int); +int foo(int); + +struct PairT { + int t1; + int t2; +}; + +struct PairT baz(int); + +int spawn_tests(int n) { + Foo f; + // n is evaluated before spawn, result is passsed by value. + _Cilk_spawn bar(n); + // f.getBar() is evalauted before spawn, result passed by value. Only + // getValue() call is spawned. + _Cilk_spawn f.getBar().getValue(); + // getBarFromFoo(f) is evaluated before spawn. Only getintFromBar() is + // spawned. + _Cilk_spawn getintFromBar(getBarFromFoo(f)); + // [&]{ getintFromBar(getBarFromFoo(f)); }(); + return 0; +} + +int basic_spawn_assign_tests(int n, int *p) { + Foo f; + int x; + // Call to bar and store to address of x are both spawned. + x = _Cilk_spawn bar(n); + // Address computation of *p happens before detach. + *p = _Cilk_spawn bar(n); + // Only the call to getValue() is spawned. + int x1; + x1 = _Cilk_spawn f.getBar().getValue(); + // Call to getBar() and subsequent copy is spawned. + Bar fb; + fb = _Cilk_spawn f.getBar(); + // Memory allocation and call to constructor are both spawned. EH structures + // are local to detached block, and detach-local EH terminates in a resume. + Foo *myfoo; + myfoo = _Cilk_spawn new Foo(); + struct PairT pair; + pair = _Cilk_spawn baz(n-2); + return 0; +} + +int basic_spawn_decl_tests(int n) { + Foo f; + // Call to foo and store to address of y are spawned. + int y = _Cilk_spawn foo(n); + // Call to getValue and store to address of z are spawned. + int z = _Cilk_spawn f.getBar().getValue(); + // Call to getBar() and subsequent copy is spawned. + Bar fb = _Cilk_spawn f.getBar(); + // Call to foo and store to a is spawned. Call to bar and store to c is + // spawned. + int a = _Cilk_spawn foo(n-1), b = 7, c = _Cilk_spawn bar(n-1); + // Call to foo, explicit cast, and store to yl are spawned. + long yl = _Cilk_spawn (long)foo(n); + // Call to foo, implicit cast, and store to yl2 are spawned. + long yl2 = _Cilk_spawn foo(n); + // Memory allocation, call to Bar() constructor, and store are spawned. + Bar *mybar = _Cilk_spawn new Bar(); + // Call to baz and store are spawned. + struct PairT pair = _Cilk_spawn baz(n-2); + return 0; +} + +int spawn_assign_eval_order_tests(int n) { + int i = 0; + int Arr[5]; + Arr[i++] = _Cilk_spawn bar(i++); // expected-warning {{multiple unsequenced modifications to 'i'}} + Arr[i++] += bar(i); // expected-warning {{unsequenced modification and access to 'i'}} + Arr[i++] += _Cilk_spawn bar(i); // expected-warning {{unsequenced modification and access to 'i'}} expected-error {{invalid _Cilk_spawn in expression}} + return 0; +} + +void invalid_spawn_expr() { + int x = 0; + x + _Cilk_spawn 7; // expected-warning {{expression result unused}} expected-error {{invalid _Cilk_spawn in expression}} + int y = x + _Cilk_spawn 7; // expected-error {{invalid _Cilk_spawn in expression}} + x += _Cilk_spawn 7; // expected-error {{invalid _Cilk_spawn in expression}} +} diff --git a/clang/test/Cilk/stream-compat.cpp b/clang/test/Cilk/stream-compat.cpp new file mode 100644 index 00000000000000..8a407cd7059d82 --- /dev/null +++ b/clang/test/Cilk/stream-compat.cpp @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 %s -x c++ -fopencilk -verify -fsyntax-only +// expected-no-diagnostics +template +class ostream_view { + Char value; +public: + void reduce(ostream_view* other); + static void reduce(void *left_v, void *right_v); + static void identity(void *view); +}; + +template + using ostream_reducer = ostream_view + _Hyperobject(&ostream_view::identity, + &ostream_view::reduce); + +void f() +{ + // The types of a and b should be compatible. + ostream_reducer *a = nullptr; + ostream_reducer *b = a; +} diff --git a/clang/test/Cilk/syncregion-debug-info.c b/clang/test/Cilk/syncregion-debug-info.c new file mode 100644 index 00000000000000..728310a41f82af --- /dev/null +++ b/clang/test/Cilk/syncregion-debug-info.c @@ -0,0 +1,17 @@ +// Check that Clang attaches some debug information to +// syncregion.start intrinsics. Doing so helps ensure that bitcode +// files with debug information can be properly linked during Tapir +// lowering and their functions subsequently inlined. +// +// RUN: %clang_cc1 %s -debug-info-kind=standalone -fopencilk -ftapir=none -S -emit-llvm -o - | FileCheck %s + +int fib(int n) { + if (n < 2) return n; + int x = _Cilk_spawn fib(n-1); + int y = fib(n-2); + _Cilk_sync; + return x + y; +} + +// CHECK-LABEL: define {{.*}}i32 @fib( +// CHECK: call token @llvm.syncregion.start(), !dbg diff --git a/clang/test/Cilk/tapirloopattrs.c b/clang/test/Cilk/tapirloopattrs.c new file mode 100644 index 00000000000000..7930767e5497ef --- /dev/null +++ b/clang/test/Cilk/tapirloopattrs.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -fopencilk -ftapir=none -S -emit-llvm -o - | FileCheck %s +// expected-no-diagnostics + +void parfor_novec(double *restrict y, double *restrict x, double a, int n) { + #pragma clang loop vectorize(disable) + _Cilk_for (int i = 0; i < n; ++i) + y[i] += a * x[i]; +} + +void parfor_unroll_vec(double *restrict y, double *restrict x, double a, int n) { + #pragma clang loop unroll_count(4) + #pragma clang loop vectorize_width(4) + for (int i = 0; i < n; ++i) + y[i] += a * x[i]; + + #pragma clang loop unroll_count(4) + #pragma clang loop vectorize_width(4) + _Cilk_for (int i = 0; i < n; ++i) + y[i] += a * x[i]; +} + +// CHECK: define {{.*}}void @parfor_novec(ptr noalias noundef %y, ptr noalias noundef %x, double noundef %a, i32 noundef %n) +// CHECK: !llvm.loop [[LOOPID1:![0-9]+]] + +// CHECK: define {{.*}}void @parfor_unroll_vec(ptr noalias noundef %y, ptr noalias noundef %x, double noundef %a, i32 noundef %n) +// CHECK: !llvm.loop [[LOOPID2:![0-9]+]] +// CHECK: !llvm.loop [[LOOPID3:![0-9]+]] + +// CHECK: [[LOOPID1]] = distinct !{[[LOOPID1]], [[TAPIR_SPAWN_STRATEGY:![0-9]+]], [[NOVEC:![0-9]+]]} +// CHECK: [[TAPIR_SPAWN_STRATEGY]] = !{!"tapir.loop.spawn.strategy", i32 1} +// CHECK: [[NOVEC]] = !{!"llvm.loop.vectorize.width", i32 1} + +// CHECK: [[LOOPID2]] = distinct !{[[LOOPID2]], [[MUSTPROGRESS:![0-9]+]], [[VECATTRS:!.+]], [[VECFOLLOWALL1:![0-9]+]]} +// CHECK: [[VECFOLLOWALL1]] = !{!"llvm.loop.vectorize.followup_all", [[VECFOLLOW1:![0-9]+]]} +// CHECK: [[VECFOLLOW1]] = distinct !{[[VECFOLLOW1]], [[MUSTPROGRESS]], [[VECFOLLOWATTRS:!.+]]} + +// CHECK: [[LOOPID3]] = distinct !{[[LOOPID3]], [[TAPIR_SPAWN_STRATEGY]], [[VECATTRS]], [[VECFOLLOWALL2:![0-9]+]]} +// CHECK: [[VECFOLLOWALL2]] = !{!"llvm.loop.vectorize.followup_all", [[VECFOLLOW2:![0-9]+]]} +// CHECK: [[VECFOLLOW2]] = distinct !{[[VECFOLLOW2]], [[TAPIR_SPAWN_STRATEGY]], [[VECFOLLOWATTRS]]} diff --git a/clang/test/Cilk/taskframe-always-inline.cpp b/clang/test/Cilk/taskframe-always-inline.cpp new file mode 100644 index 00000000000000..630d1365f57937 --- /dev/null +++ b/clang/test/Cilk/taskframe-always-inline.cpp @@ -0,0 +1,51 @@ +// Check that -O0 compilation handles always_inline functions before Tapir +// lowering. +// +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -triple x86_64-unknown-linux-gnu -emit-llvm -mllvm -debug-abi-calls -mllvm -use-opencilk-runtime-bc=false %s -o- | FileCheck %s + +void print(const char *s, ...); + +void bar(int x); +void foo(int x); + +__attribute__((always_inline)) +void top(int x) { + _Cilk_spawn foo(1); + try { + _Cilk_spawn bar(1); + bar(2); + } catch (int e) { + print("top caught exception: %d\n", e); + } +} + +int main(int argc, char *argv[]) { + print("main"); + try { + _Cilk_spawn top(1); + top(2); + } catch (char e) { + print("main caught exception %c\n", e); + } + print("main done"); + + return 0; +} + +// CHECK: define {{.*}}i32 @main( + +// CHECK: invoke {{.*}}void @main.outline +// CHECK-NEXT: to label %[[DET_CONT:.+]] unwind + +// CHECK: [[DET_CONT]]: +// CHECK: invoke {{.*}}void [[MAIN_TOP2_OTF0:@main\.outline.+.otf0]]( +// CHECK-NEXT: to label %[[INVOKE_CONT:.+]] unwind + +// CHECK: [[INVOKE_CONT]]: +// CHECK: invoke void @__cilk_sync( + + +// CHECK: define {{.*}}void [[MAIN_TOP2_OTF0]]( +// CHECK: call void @__cilkrts_enter_frame( +// CHECK: call i32 @__cilk_prepare_spawn( + diff --git a/clang/test/Cilk/trivial-assign-op.cpp b/clang/test/Cilk/trivial-assign-op.cpp new file mode 100644 index 00000000000000..062ca5182dfd45 --- /dev/null +++ b/clang/test/Cilk/trivial-assign-op.cpp @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -fcxx-exceptions -fexceptions -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +struct event { + float v; + long p; + event(float value, long index, bool type) + : v(value), p((index << 1) + type) {} + event() {} +}; + +struct range { + float min; + float max; + range(float _min, float _max) : min(_min), max(_max) {} + range() {} +}; + +typedef range* Boxes[3]; +typedef event* Events[3]; +typedef range BoundingBox[3]; + +struct cutInfo { + float cost; + float cutOff; + long numLeft; + long numRight; +cutInfo(float _cost, float _cutOff, long nl, long nr) +: cost(_cost), cutOff(_cutOff), numLeft(nl), numRight(nr) {} + cutInfo() {} +}; + +cutInfo bestCut(event* E, range r, range r1, range r2, long n); + +void generateNode(Boxes boxes, Events events, BoundingBox B, long n, + int maxDepth) { + cutInfo cuts[3]; + cuts[0] = _Cilk_spawn bestCut(events[0], B[0], B[(0+1)%3], B[(0+2)%3], n); + cuts[1] = _Cilk_spawn bestCut(events[1], B[1], B[(1+1)%3], B[(1+2)%3], n); + cuts[2] = _Cilk_spawn bestCut(events[2], B[2], B[(2+1)%3], B[(2+2)%3], n); + _Cilk_sync; +} + +// CHECK: define {{.*}}void @_Z12generateNodePP5rangePP5eventS0_li( +// CHECK: getelementptr inbounds [3 x %struct.cutInfo], ptr %cuts, i64 0, i64 0 +// CHECK-NOT: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: detach +// CHECK: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: call void @llvm.memcpy +// CHECK: reattach +// CHECK: getelementptr inbounds [3 x %struct.cutInfo], ptr %cuts, i64 0, i64 1 +// CHECK-NOT: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: detach +// CHECK: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: call void @llvm.memcpy +// CHECK: reattach +// CHECK: getelementptr inbounds [3 x %struct.cutInfo], ptr %cuts, i64 0, i64 2 +// CHECK-NOT: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: detach +// CHECK: call void @_Z7bestCutP5event5rangeS1_S1_l( +// CHECK: call void @llvm.memcpy +// CHECK: reattach +// CHECK: sync diff --git a/clang/test/Cilk/unreachable-sync.cpp b/clang/test/Cilk/unreachable-sync.cpp new file mode 100644 index 00000000000000..7aa96518db65da --- /dev/null +++ b/clang/test/Cilk/unreachable-sync.cpp @@ -0,0 +1,18 @@ +// Check that a sync is not inserted when clang recognizes that it is not reachable. +// +// RUN: %clang_cc1 -fopencilk -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s +int create() { + return 1; + _Cilk_sync; +} + +// CHECK: define dso_local {{.*}}i32 @_Z6createv() +// CHECK: { +// CHECK: ret i32 1 +// CHECK-NOT: sync +// CHECK: } + +int main([[maybe_unused]] int argc, char *argv[]) { + int e = create(); + return 0; +} diff --git a/clang/test/Cilk/vla-of-hyper.c b/clang/test/Cilk/vla-of-hyper.c new file mode 100644 index 00000000000000..dd77276463c0ba --- /dev/null +++ b/clang/test/Cilk/vla-of-hyper.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -x c %s -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -x c++ %s -fopencilk -verify -ftapir=none -S -emit-llvm -o - | FileCheck %s + +extern void reduce(void *, void *), identity(void *); + +// VLA of hyperobject +// CHECK-LABEL: test_vla_hyper +int test_vla_hyper(unsigned long size) +{ + int _Hyperobject(identity, reduce) array[size]; + // expected-warning@-1{{array of reducer not implemented}} + + // CHECK: getelementptr + // CHECK: %[[RAW:.+]] = call ptr @llvm.hyper.lookup + // CHECK-NEXT: %[[RET:.+]] = load i32, ptr %[[RAW]] + // CHECK-NOT: getelementptr + // CHECK: ret i32 %[[RET]] + return array[2]; +} diff --git a/clang/test/Cilk/worker-load-test.c b/clang/test/Cilk/worker-load-test.c new file mode 100644 index 00000000000000..efd880cd248a06 --- /dev/null +++ b/clang/test/Cilk/worker-load-test.c @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 %s -fopencilk -triple x86_64-unknown-linux-gnu -mllvm -use-opencilk-runtime-bc=true -mllvm -opencilk-runtime-bc-path=%S/Inputs/libopencilk-abi.bc -O2 -S -emit-llvm -o - | FileCheck %s + +typedef int int32_t; +typedef unsigned char uint8_t; + +#define QT_IGNORE_HEIGHT 3 + +#define MAX_NODES 100 + +/* + * Find the begining of the parent section in the lines array + * l_ and r_ are the left and right boundaries for the search + * h is the height of the parent node should be + */ +int32_t bsearch_parents(int32_t l_, int32_t r_, uint8_t h); + +/* + * Find the begining of the child section representing quadrant q (0, 1, 2, 3) + * l_ and r_ are the left and right boundaries for the search + * h is the height of the parent node should be + */ +int32_t bsearch_quadrant(int32_t l_, int32_t r_, uint8_t h, uint8_t q); + +/* + * Check possible collisions among lines in lines[l...r] + */ +void detectCollisionsSlow(int l, int r); + +/* + * Check possible collisions between lines in lines[l1...r1] and lines[l2...r2] + */ +void detectCollisionsSlow2(int l1, int r1, int l2, int r2); + +/* + * Detect collisions in the subtree of the quadtree represeted by lines[l...r] + * and at the height of h + */ +void detectLocalizedCollisions(int32_t l, int32_t r, uint8_t h) { + if (l >= r || l < 0 || r < 0) { + return; + } + + if (h <= QT_IGNORE_HEIGHT || r - l + 1 <= MAX_NODES) { + return detectCollisionsSlow(l, r); + } + + int32_t m[] = {l, -1, -1, -1, -1, r+1}; + m[4] = bsearch_parents(l, r, h); + m[1] = bsearch_quadrant(l, m[4]-1, h, 1); + m[2] = bsearch_quadrant(l, m[4]-1, h, 2); + m[3] = bsearch_quadrant(l, m[4]-1, h, 3); + + int i, j; + for (i = 0, j = 1; j < 5; j++) { + if (m[j] != -1) { + _Cilk_spawn detectLocalizedCollisions(m[i], m[j]-1, h-1); + i = j; + } + } + _Cilk_spawn detectCollisionsSlow(m[4], r); + + if (l > m[4]-1 || m[4] > r) return; + + detectCollisionsSlow2(l, m[4]-1, m[4], r); + _Cilk_sync; +} + +// CHECK-LABEL: define {{.*}}void @detectLocalizedCollisions( +// CHECK: %[[CILKRTS_SF:.+]] = alloca %struct.__cilkrts_stack_frame +// CHECK: %[[WORKER_LOAD:.+]] = load ptr, ptr @__cilkrts_tls_worker +// CHECK: %[[WORKER_PHI:.+]] = phi ptr +// CHECK: %[[WORKER_PTR:.+]] = getelementptr inbounds %struct.__cilkrts_stack_frame, ptr %[[CILKRTS_SF]], i64 0, i32 3 +// CHECK: %[[WORKER_LOAD_VAL:.+]] = ptrtoint ptr %[[WORKER_PHI]] to [[WORKER_INT_TY:i[0-9]+]] +// CHECK: store atomic [[WORKER_INT_TY]] %[[WORKER_LOAD_VAL]], ptr %[[WORKER_PTR]] +// CHECK: icmp +// CHECK-NEXT: or +// CHECK-NEXT: icmp +// CHECK-NEXT: %[[CMP1:.+]] = or i1 +// CHECK-NEXT: br i1 %[[CMP1]], label %{{.+}}, label %[[IF_END:.+]] + +// CHECK: [[IF_END]]: +// CHECK-NEXT: icmp +// CHECK-NEXT: sub +// CHECK-NEXT: icmp +// CHECK-NEXT: %[[CMP2:.+]] = select i1 +// CHECK-NEXT: br i1 %[[CMP2]], label %[[IF_THEN:.+]], label %[[IF_END2:.+]] + +// CHECK: [[IF_THEN]]: +// CHECK: call void @detectCollisionsSlow( +// CHECK-NEXT: br label %[[CLEANUP_CONT:.+]] + +// CHECK: [[CLEANUP_CONT]]: +// CHECK: %[[WORKER_RELOAD_CST:.+]] = load atomic [[WORKER_INT_TY]], ptr %[[WORKER_PTR]] monotonic +// CHECK: %[[WORKER_RELOAD:.+]] = inttoptr [[WORKER_INT_TY]] %[[WORKER_RELOAD_CST]] to ptr +// CHECK: call void @Cilk_set_return(ptr noundef nonnull %[[WORKER_RELOAD]]) +// CHECK: ret void + +// CHECK-LABEL: define {{.*}}void @detectLocalizedCollisions.outline_det.achd.otd1( + +// CHECK-LABEL: define {{.*}}void @detectLocalizedCollisions.outline_det.achd44.otd1( diff --git a/clang/test/CodeGenCXX/threadlocal_address.cpp b/clang/test/CodeGenCXX/threadlocal_address.cpp index 0ae58ab5500295..bd346c209353c9 100644 --- a/clang/test/CodeGenCXX/threadlocal_address.cpp +++ b/clang/test/CodeGenCXX/threadlocal_address.cpp @@ -51,4 +51,4 @@ int f() { // CHECK-O1-NEXT: store i32 %[[INC]], ptr %[[J_ADDR]] // CHECK-O1-NEXT: ret i32 %[[INC]] // -// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +// CHECK: attributes #[[ATTR_NUM]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: read) } diff --git a/clang/test/Lexer/has_feature_comprehensive_static_instrumentation.cpp b/clang/test/Lexer/has_feature_comprehensive_static_instrumentation.cpp new file mode 100644 index 00000000000000..e54e4a2eb0d0d3 --- /dev/null +++ b/clang/test/Lexer/has_feature_comprehensive_static_instrumentation.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -E -fcsi %s -o - | FileCheck --check-prefix=CHECK-CSI %s +// RUN: %clang_cc1 -E %s -o - | FileCheck --check-prefix=CHECK-NO-CSI %s + +#if __has_feature(comprehensive_static_instrumentation) +int CsiEnabled(); +#else +int CsiDisabled(); +#endif + +// CHECK-CSI: CsiEnabled +// CHECK-NO-CSI: CsiDisabled diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index eaf6d34421bbe0..a3660af8f5ef0a 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -75,9 +75,12 @@ // CHECK-NEXT: GNUInline (SubjectMatchRule_function) // CHECK-NEXT: HIPManaged (SubjectMatchRule_variable) // CHECK-NEXT: Hot (SubjectMatchRule_function) +// CHECK-NEXT: HyperToken (SubjectMatchRule_hasType_functionType) +// CHECK-NEXT: HyperView (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: IBAction (SubjectMatchRule_objc_method_is_instance) // CHECK-NEXT: IFunc (SubjectMatchRule_function) // CHECK-NEXT: InitPriority (SubjectMatchRule_variable) +// CHECK-NEXT: Injective (SubjectMatchRule_function) // CHECK-NEXT: InternalLinkage (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record) // CHECK-NEXT: LTOVisibilityPublic (SubjectMatchRule_record) // CHECK-NEXT: Leaf (SubjectMatchRule_function) @@ -157,6 +160,8 @@ // CHECK-NEXT: Pointer (SubjectMatchRule_record_not_is_union) // CHECK-NEXT: RandomizeLayout (SubjectMatchRule_record) // CHECK-NEXT: ReadOnlyPlacement (SubjectMatchRule_record) +// CHECK-NEXT: ReducerRegister (SubjectMatchRule_hasType_functionType) +// CHECK-NEXT: ReducerUnregister (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: ReleaseHandle (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: RenderScriptKernel (SubjectMatchRule_function) // CHECK-NEXT: ReqdWorkGroupSize (SubjectMatchRule_function) @@ -170,6 +175,9 @@ // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member) // CHECK-NEXT: SpeculativeLoadHardening (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: StandaloneDebug (SubjectMatchRule_record) +// CHECK-NEXT: Stealable (SubjectMatchRule_hasType_functionType) +// CHECK-NEXT: StrandMalloc (SubjectMatchRule_function) +// CHECK-NEXT: StrandPure (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: SwiftAsync (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: SwiftAsyncContext (SubjectMatchRule_variable_is_parameter) // CHECK-NEXT: SwiftAsyncError (SubjectMatchRule_function, SubjectMatchRule_objc_method) diff --git a/clang/test/Misc/show-diag-options.c b/clang/test/Misc/show-diag-options.c index 4e98d63195f109..21181e2ecaee00 100644 --- a/clang/test/Misc/show-diag-options.c +++ b/clang/test/Misc/show-diag-options.c @@ -18,7 +18,7 @@ void test(int x, int y) { // BASE: {{.*}}: warning: {{[a-z ]+$}} // OPTION: {{.*}}: warning: {{[a-z ]+}} [-Wparentheses] // OPTION_ERROR: {{.*}}: error: {{[a-z ]+}} [-Werror,-Wparentheses] - // CATEGORY_ID: {{.*}}: warning: {{[a-z ]+}} [2] + // CATEGORY_ID: {{.*}}: warning: {{[a-z ]+}} [3] // CATEGORY_NAME: {{.*}}: warning: {{[a-z ]+}} [Semantic Issue] // OPTION_ERROR_CATEGORY: {{.*}}: error: {{[a-z ]+}} [-Werror,-Wparentheses,Semantic Issue] diff --git a/clang/test/Sema/builtin-longjmp.c b/clang/test/Sema/builtin-longjmp.c index 99463cf3385a1e..0de2cb96988c99 100644 --- a/clang/test/Sema/builtin-longjmp.c +++ b/clang/test/Sema/builtin-longjmp.c @@ -4,8 +4,8 @@ // RUN: %clang_cc1 -triple powerpc-unknown-unknown -emit-llvm < %s| FileCheck %s // RUN: %clang_cc1 -triple powerpc64-unknown-unknown -emit-llvm < %s| FileCheck %s // RUN: %clang_cc1 -triple ve-unknown-unknown -emit-llvm < %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm < %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-unknown-unknown -emit-llvm-only -verify %s // RUN: %clang_cc1 -triple mips-unknown-unknown -emit-llvm-only -verify %s // RUN: %clang_cc1 -triple mips64-unknown-unknown -emit-llvm-only -verify %s // RUN: %clang_cc1 -triple sparc-eabi-unknown -emit-llvm-only -verify %s diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt index 2182486f93a555..fa3ad70ccfe56d 100644 --- a/clang/tools/driver/CMakeLists.txt +++ b/clang/tools/driver/CMakeLists.txt @@ -13,6 +13,7 @@ set( LLVM_LINK_COMPONENTS Option ScalarOpts Support + TapirOpts TargetParser TransformUtils Vectorize diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 39886b23bb36f1..00abd4c01fb42c 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1896,6 +1896,7 @@ bool CursorVisitor::VisitPipeTypeLoc(PipeTypeLoc TL) { } DEFAULT_TYPELOC_IMPL(Complex, Type) +DEFAULT_TYPELOC_IMPL(Hyperobject, Type) DEFAULT_TYPELOC_IMPL(ConstantArray, ArrayType) DEFAULT_TYPELOC_IMPL(IncompleteArray, ArrayType) DEFAULT_TYPELOC_IMPL(VariableArray, ArrayType) @@ -6030,6 +6031,16 @@ CXString clang_getCursorKindSpelling(enum CXCursorKind Kind) { return cxstring::createRef("attribute(aligned)"); case CXCursor_ConceptDecl: return cxstring::createRef("ConceptDecl"); + case CXCursor_CilkSpawnStmt: + return cxstring::createRef("CilkSpawnStmt"); + case CXCursor_CilkSpawnExpr: + return cxstring::createRef("CilkSpawnExpr"); + case CXCursor_CilkSyncStmt: + return cxstring::createRef("CilkSyncStmt"); + case CXCursor_CilkForStmt: + return cxstring::createRef("CilkForStmt"); + case CXCursor_CilkScopeStmt: + return cxstring::createRef("CilkScopeStmt"); } llvm_unreachable("Unhandled CXCursorKind"); diff --git a/clang/tools/libclang/CXCursor.cpp b/clang/tools/libclang/CXCursor.cpp index d48063f105f9f2..6fca0792be2715 100644 --- a/clang/tools/libclang/CXCursor.cpp +++ b/clang/tools/libclang/CXCursor.cpp @@ -293,6 +293,26 @@ CXCursor cxcursor::MakeCXCursor(const Stmt *S, const Decl *Parent, K = CXCursor_UnexposedStmt; break; + case Stmt::CilkSpawnStmtClass: + K = CXCursor_CilkSpawnStmt; + break; + + case Stmt::CilkSpawnExprClass: + K = CXCursor_CilkSpawnExpr; + break; + + case Stmt::CilkSyncStmtClass: + K = CXCursor_CilkSyncStmt; + break; + + case Stmt::CilkForStmtClass: + K = CXCursor_CilkForStmt; + break; + + case Stmt::CilkScopeStmtClass: + K = CXCursor_CilkScopeStmt; + break; + case Stmt::ArrayTypeTraitExprClass: case Stmt::AsTypeExprClass: case Stmt::AtomicExprClass: diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp index eb8bfc25a7c910..edae3bd3e00ded 100644 --- a/clang/tools/libclang/CXType.cpp +++ b/clang/tools/libclang/CXType.cpp @@ -93,6 +93,7 @@ static CXTypeKind GetTypeKind(QualType T) { case Type::Builtin: return GetBuiltinTypeKind(cast(TP)); TKCASE(Complex); + TKCASE(Hyperobject); TKCASE(Pointer); TKCASE(BlockPointer); TKCASE(LValueReference); @@ -593,6 +594,7 @@ CXString clang_getTypeKindSpelling(enum CXTypeKind K) { TKIND(ObjCClass); TKIND(ObjCSel); TKIND(Complex); + TKIND(Hyperobject); TKIND(Pointer); TKIND(BlockPointer); TKIND(LValueReference); @@ -830,6 +832,9 @@ CXType clang_getElementType(CXType CT) { case Type::Complex: ET = cast (TP)->getElementType(); break; + case Type::Hyperobject: + ET = cast (TP)->getElementType(); + break; default: break; } diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake index 6a62d3bf7adced..160b92b51c24c3 100644 --- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake +++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake @@ -330,6 +330,10 @@ macro(darwin_add_builtin_library name suffix) set_target_properties(${libname} PROPERTIES OSX_ARCHITECTURES ${LIB_ARCH}) + if (${arch} STREQUAL arm64 OR ${arch} STREQUAL arm64e) + add_dependencies(${libname} outline_atomic_helpers) + endif() + if(LIB_PARENT_TARGET) add_dependencies(${LIB_PARENT_TARGET} ${libname}) endif() diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt index d62fa0432e2a5a..b3c601a7b6689c 100644 --- a/compiler-rt/lib/builtins/CMakeLists.txt +++ b/compiler-rt/lib/builtins/CMakeLists.txt @@ -556,6 +556,7 @@ set(aarch64_SOURCES ) # Generate outline atomics helpers from lse.S base +set(atomic_helpers) set(OA_HELPERS_DIR "${CMAKE_CURRENT_BINARY_DIR}/outline_atomic_helpers.dir") file(MAKE_DIRECTORY "${OA_HELPERS_DIR}") @@ -577,12 +578,15 @@ foreach(pat cas swp ldadd ldclr ldeor ldset) COMPILE_DEFINITIONS "L_${pat};SIZE=${size};MODEL=${model}" INCLUDE_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}" ) + list(APPEND atomic_helpers "${helper_asm}") list(APPEND aarch64_SOURCES "${helper_asm}") endif() endforeach(model) endforeach(size) endforeach(pat) +add_custom_target(outline_atomic_helpers DEPENDS ${atomic_helpers}) + if (MINGW) set(aarch64_SOURCES ${aarch64_SOURCES} diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index d2b3b63f3a7a3b..5d4aa08f36ad03 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -1455,7 +1455,7 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, : "memory"); return res; } -#elif defined(__aarch64__) +#elif defined(__aarch64__) && defined(__linux__) uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg, int *parent_tidptr, void *newtls, int *child_tidptr) { register long long res __asm__("x0"); @@ -1996,7 +1996,7 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { static const uptr FSR_WRITE = 1U << 11; uptr fsr = ucontext->uc_mcontext.error_code; return fsr & FSR_WRITE ? Write : Read; -#elif defined(__aarch64__) +#elif defined(__aarch64__) && defined(__linux__) static const u64 ESR_ELx_WNR = 1U << 6; u64 esr; if (!Aarch64GetESR(ucontext, &esr)) return Unknown; diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h index 28562f02144670..65a064806c8199 100644 --- a/libcxxabi/src/demangle/ItaniumDemangle.h +++ b/libcxxabi/src/demangle/ItaniumDemangle.h @@ -639,6 +639,36 @@ class PointerType final : public Node { } }; +class HyperobjectType final : public Node { + const Node *View; + +public: + HyperobjectType(const Node *View_) + : Node(KHyperobjectType, View_->RHSComponentCache), + View(View_) {} + + template void match(Fn F) const { F(View); } + + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return View->hasRHSComponent(OB); + } + + void printLeft(OutputBuffer &OB) const override { + View->printLeft(OB); + if (View->hasArray(OB)) + OB += " "; + if (View->hasArray(OB) || View->hasFunction(OB)) + OB += "("; + OB += " _Hyperobject"; + } + + void printRight(OutputBuffer &OB) const override { + if (View->hasArray(OB) || View->hasFunction(OB)) + OB += ")"; + View->printRight(OB); + } +}; + enum class ReferenceKind { LValue, RValue, @@ -4050,6 +4080,14 @@ Node *AbstractManglingParser::parseType() { Result = make(Ptr); break; } + case 'H': { + ++First; + Node *Ptr = getDerived().parseType(); + if (Ptr == nullptr) + return nullptr; + Result = make(Ptr); + break; + } // ::= R # l-value reference case 'R': { ++First; diff --git a/libcxxabi/src/demangle/ItaniumNodes.def b/libcxxabi/src/demangle/ItaniumNodes.def index f615cb9fadb05e..2b7690708052a1 100644 --- a/libcxxabi/src/demangle/ItaniumNodes.def +++ b/libcxxabi/src/demangle/ItaniumNodes.def @@ -26,6 +26,7 @@ NODE(ObjCProtoName) NODE(PointerType) NODE(ReferenceType) NODE(PointerToMemberType) +NODE(HyperobjectType) NODE(ArrayType) NODE(FunctionType) NODE(NoexceptSpec) diff --git a/lld/COFF/Config.h b/lld/COFF/Config.h index 96b49ed3850d49..d4bf359e19721d 100644 --- a/lld/COFF/Config.h +++ b/lld/COFF/Config.h @@ -17,6 +17,7 @@ #include "llvm/Object/COFF.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include #include #include @@ -228,6 +229,10 @@ struct Configuration { // Used for /mapinfo. bool mapInfo = false; + // Used for Tapir target. + llvm::StringRef opencilkABIBitcodeFile; + llvm::TapirTargetID tapirTarget = llvm::TapirTargetID::None; + // Used for /thinlto-index-only: llvm::StringRef thinLTOIndexOnlyArg; diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index e2f414f78ecb7b..eb4da78a8359e1 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -816,6 +816,7 @@ static std::string createResponseFile(const opt::InputArgList &args, case OPT_deffile: case OPT_manifestinput: case OPT_natvis: + case OPT_opencilk_abi_bitcode: os << arg->getSpelling() << quote(rewritePath(arg->getValue())) << '\n'; break; case OPT_order: { @@ -1989,6 +1990,11 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (config->mingw || config->debugDwarf) config->warnLongSectionNames = false; + config->opencilkABIBitcodeFile = + args.getLastArgValue(OPT_opencilk_abi_bitcode); + config->tapirTarget = + args::parseTapirTarget(args.getLastArgValue(OPT_tapir_target)); + if (config->incremental && args.hasArg(OPT_profile)) { warn("ignoring '/incremental' due to '/profile' specification"); config->incremental = false; diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp index 67f5a62920e98e..1c2eca185406fd 100644 --- a/lld/COFF/LTO.cpp +++ b/lld/COFF/LTO.cpp @@ -94,6 +94,9 @@ lto::Config BitcodeCompiler::createConfig() { c.CGOptLevel = *optLevelOrNone; c.AlwaysEmitRegularLTOObj = !ctx.config.ltoObjPath.empty(); c.DebugPassManager = ctx.config.ltoDebugPassManager; + if (args::validTapirTarget(ctx.config.tapirTarget)) + c.TapirTarget = ctx.config.tapirTarget; + c.OpenCilkABIBitcodeFile = std::string(ctx.config.opencilkABIBitcodeFile); c.CSIRProfile = std::string(ctx.config.ltoCSProfileFile); c.RunCSIRInstr = ctx.config.ltoCSProfileGenerate; c.PGOWarnMismatch = ctx.config.ltoPGOWarnMismatch; diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td index ea4ddb2d849534..1924494a4cc7e8 100644 --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -234,6 +234,9 @@ def include_optional : Joined<["/", "-", "/?", "-?"], "includeoptional:">, def kill_at : F<"kill-at">; def lldmingw : F<"lldmingw">; def noseh : F<"noseh">; +def opencilk_abi_bitcode : P< + "opencilk-abi-bitcode", + "Path to OpenCilk ABI bitcode file">; def osversion : P_priv<"osversion">; def output_def : Joined<["/", "-", "/?", "-?"], "output-def:">; def pdb_source_path : P<"pdbsourcepath", @@ -243,6 +246,7 @@ def rsp_quoting : Joined<["--"], "rsp-quoting=">, def start_lib : F<"start-lib">, HelpText<"Start group of objects treated as if they were in a library">; defm stdcall_fixup : B_priv<"stdcall-fixup">; +def tapir_target : P<"tapir-target", "Specify the target for Tapir lowering">; def thinlto_emit_imports_files : F<"thinlto-emit-imports-files">, HelpText<"Emit .imports files with -thinlto-index-only">; diff --git a/lld/Common/Args.cpp b/lld/Common/Args.cpp index 48c934df3a2c93..2324c64aa211b0 100644 --- a/lld/Common/Args.cpp +++ b/lld/Common/Args.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" @@ -91,3 +92,20 @@ StringRef lld::args::getFilenameWithoutExe(StringRef path) { return sys::path::stem(path); return sys::path::filename(path); } + +TapirTargetID lld::args::parseTapirTarget(StringRef tapirTarget) { + return llvm::StringSwitch(tapirTarget) + .Case("none", TapirTargetID::None) + .Case("serial", TapirTargetID::Serial) + .Case("cheetah", TapirTargetID::Cheetah) + .Case("cilkplus", TapirTargetID::Cilk) + .Case("lambda", TapirTargetID::Lambda) + .Case("omptask", TapirTargetID::OMPTask) + .Case("opencilk", TapirTargetID::OpenCilk) + .Case("qthreads", TapirTargetID::Qthreads) + .Default(TapirTargetID::Last_TapirTargetID); +} + +bool lld::args::validTapirTarget(TapirTargetID TargetID) { + return TargetID < TapirTargetID::Last_TapirTargetID; +} diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index bbf2d201564581..76f91f13018f04 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -27,6 +27,7 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include #include #include @@ -166,6 +167,7 @@ struct Config { llvm::StringRef ltoSampleProfile; llvm::StringRef mapFile; llvm::StringRef outputFile; + llvm::StringRef opencilkABIBitcodeFile; llvm::StringRef optRemarksFilename; std::optional optRemarksHotnessThreshold = 0; llvm::StringRef optRemarksPasses; @@ -176,6 +178,7 @@ struct Config { llvm::StringRef printSymbolOrder; llvm::StringRef soName; llvm::StringRef sysroot; + llvm::TapirTargetID tapirTarget = llvm::TapirTargetID::None; llvm::StringRef thinLTOCacheDir; llvm::StringRef thinLTOIndexOnlyArg; llvm::StringRef whyExtract; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index c2059c70e15a3d..6f1146608e940c 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -1247,6 +1247,8 @@ static void readConfigs(opt::InputArgList &args) { config->nostdlib = args.hasArg(OPT_nostdlib); config->oFormatBinary = isOutputFormatBinary(args); config->omagic = args.hasFlag(OPT_omagic, OPT_no_omagic, false); + config->opencilkABIBitcodeFile = + args.getLastArgValue(OPT_opencilk_abi_bitcode); config->optRemarksFilename = args.getLastArgValue(OPT_opt_remarks_filename); config->optStatsFilename = args.getLastArgValue(OPT_plugin_opt_stats_file); @@ -1304,6 +1306,8 @@ static void readConfigs(opt::InputArgList &args) { config->splitStackAdjustSize = args::getInteger(args, OPT_split_stack_adjust_size, 16384); config->strip = getStrip(args); config->sysroot = args.getLastArgValue(OPT_sysroot); + config->tapirTarget = + args::parseTapirTarget(args.getLastArgValue(OPT_tapir_target)); config->target1Rel = args.hasFlag(OPT_target1_rel, OPT_target1_abs, false); config->target2 = getTarget2(args); config->thinLTOCacheDir = args.getLastArgValue(OPT_thinlto_cache_dir); diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp index 5e3f6d1459d8e6..e071382b03b6dd 100644 --- a/lld/ELF/DriverUtils.cpp +++ b/lld/ELF/DriverUtils.cpp @@ -192,6 +192,7 @@ std::string elf::createResponseFile(const opt::InputArgList &args) { case OPT_export_dynamic_symbol_list: case OPT_just_symbols: case OPT_library_path: + case OPT_opencilk_abi_bitcode: case OPT_remap_inputs_file: case OPT_retain_symbols_file: case OPT_rpath: diff --git a/lld/ELF/LTO.cpp b/lld/ELF/LTO.cpp index e8bfa903726d0e..e659397621953f 100644 --- a/lld/ELF/LTO.cpp +++ b/lld/ELF/LTO.cpp @@ -151,6 +151,10 @@ static lto::Config createConfig() { c.DebugPassManager = config->ltoDebugPassManager; c.DwoDir = std::string(config->dwoDir); + if (args::validTapirTarget(config->tapirTarget)) + c.TapirTarget = config->tapirTarget; + c.OpenCilkABIBitcodeFile = std::string(config->opencilkABIBitcodeFile); + c.HasWholeProgramVisibility = config->ltoWholeProgramVisibility; c.AlwaysEmitRegularLTOObj = !config->ltoObjPath.empty(); diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td index 0d5c6c3d80a106..93f603be46a468 100644 --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -609,6 +609,7 @@ defm lto_whole_program_visibility: BB<"lto-whole-program-visibility", "Asserts that the LTO link does not have whole program visibility">; def disable_verify: F<"disable-verify">; defm mllvm: Eq<"mllvm", "Additional arguments to forward to LLVM's option processing">; +defm opencilk_abi_bitcode: EEq<"opencilk-abi-bitcode", "Path to OpenCilk ABI bitcode file">; def opt_remarks_filename: Separate<["--"], "opt-remarks-filename">, HelpText<"YAML output file for optimization remarks">; defm opt_remarks_hotness_threshold: EEq<"opt-remarks-hotness-threshold", @@ -633,6 +634,7 @@ defm shuffle_sections: EEq<"shuffle-sections", "Shuffle matched sections using the given seed before mapping them to the output sections. " "If -1, reverse the section order. If 0, use a random seed">, MetaVarName<"=">; +defm tapir_target: Eq<"tapir-target", "Specify the target for Tapir lowering">; def thinlto_cache_dir: JJ<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; defm thinlto_cache_policy: EEq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; @@ -668,6 +670,9 @@ def: J<"plugin-opt=obj-path=">, HelpText<"Alias for --lto-obj-path=">; def plugin_opt_opaque_pointers: F<"plugin-opt=opaque-pointers">, HelpText<"Use opaque pointers in IR during LTO (default)">; +def: J<"plugin-opt=opencilk-abi-bitcode=">, + Alias, + HelpText<"Alias for --opencilk-abi-bitcode">; def: J<"plugin-opt=opt-remarks-filename=">, Alias, HelpText<"Alias for --opt-remarks-filename">; @@ -688,6 +693,9 @@ def: J<"plugin-opt=sample-profile=">, def: F<"plugin-opt=save-temps">, Alias, HelpText<"Alias for --save-temps">; def plugin_opt_stats_file: J<"plugin-opt=stats-file=">, HelpText<"Filename to write LTO statistics to">; +def: J<"plugin-opt=tapir-target=">, + Alias, + HelpText<"Alias for --tapir-target=">; def: F<"plugin-opt=thinlto-emit-imports-files">, Alias, HelpText<"Alias for --thinlto-emit-imports-files">; diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h index 59eb882c078369..c03415c45e65df 100644 --- a/lld/MachO/Config.h +++ b/lld/MachO/Config.h @@ -23,6 +23,7 @@ #include "llvm/TextAPI/Architecture.h" #include "llvm/TextAPI/Platform.h" #include "llvm/TextAPI/Target.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include @@ -233,6 +234,9 @@ struct Configuration { std::vector dyldEnvs; + llvm::TapirTargetID tapirTarget = llvm::TapirTargetID::None; + llvm::StringRef opencilkABIBitcodeFile; + llvm::MachO::Architecture arch() const { return platformInfo.target.Arch; } llvm::MachO::PlatformType platform() const { diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index ce7f6d567b613b..eb5c56079b54aa 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1641,6 +1641,10 @@ bool link(ArrayRef argsArr, llvm::raw_ostream &stdoutOS, config->csProfileGenerate = args.hasArg(OPT_cs_profile_generate); config->csProfilePath = args.getLastArgValue(OPT_cs_profile_path); config->generateUuid = !args.hasArg(OPT_no_uuid); + config->tapirTarget = + args::parseTapirTarget(args.getLastArgValue(OPT_tapir_target)); + config->opencilkABIBitcodeFile = + args.getLastArgValue(OPT_opencilk_abi_bitcode); for (const Arg *arg : args.filtered(OPT_alias)) { config->aliasedSymbols.push_back( diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp index fdae7e4bd1b7bc..9a03902876dda4 100644 --- a/lld/MachO/LTO.cpp +++ b/lld/MachO/LTO.cpp @@ -73,6 +73,9 @@ static lto::Config createConfig() { c.RunCSIRInstr = config->csProfileGenerate; c.OptLevel = config->ltoo; c.CGOptLevel = config->ltoCgo; + if (args::validTapirTarget(config->tapirTarget)) + c.TapirTarget = config->tapirTarget; + c.OpenCilkABIBitcodeFile = std::string(config->opencilkABIBitcodeFile); if (config->saveTemps) checkError(c.addSaveTemps(config->outputFile.str() + ".", /*UseInputModulePath=*/true)); diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td index b60f5e44c3c1d9..36b7e1b8d19d44 100644 --- a/lld/MachO/Options.td +++ b/lld/MachO/Options.td @@ -130,6 +130,12 @@ def cs_profile_generate: Flag<["--"], "cs-profile-generate">, HelpText<"Perform context senstive PGO instrumentation">, Group; def cs_profile_path: Joined<["--"], "cs-profile-path=">, HelpText<"Context sensitive profile file path">, Group; +def tapir_target: Joined<["--"], "tapir-target=">, + HelpText<"Specify the target for Tapir lowering">, + Group; +def opencilk_abi_bitcode: Joined<["--"], "opencilk-abi-bitcode=">, + HelpText<"Path to the OpenCilk ABI bitcode file">, + Group; // This is a complete Options.td compiled from Apple's ld(1) manpage // dated 2018-03-07 and cross checked with ld64 source code in repo diff --git a/lld/include/lld/Common/Args.h b/lld/include/lld/Common/Args.h index 60f83fbbbf1a3c..f0e65c493b996f 100644 --- a/lld/include/lld/Common/Args.h +++ b/lld/include/lld/Common/Args.h @@ -12,6 +12,7 @@ #include "lld/Common/LLVM.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include namespace llvm { @@ -40,6 +41,10 @@ std::vector getLines(MemoryBufferRef mb); StringRef getFilenameWithoutExe(StringRef path); +llvm::TapirTargetID parseTapirTarget(StringRef tapirTarget); + +bool validTapirTarget(llvm::TapirTargetID TargetID); + } // namespace args } // namespace lld diff --git a/llvm/.gitignore b/llvm/.gitignore index eb69323201a533..76fad9b907fbea 100644 --- a/llvm/.gitignore +++ b/llvm/.gitignore @@ -65,3 +65,8 @@ docs/_build .sw? #OS X specific files. .DS_store + +build/* +build +build-debug/* +build-debug diff --git a/llvm/.gitmodules b/llvm/.gitmodules new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 79de9eb2e3e71b..5f8dcf7a42a080 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -8,6 +8,19 @@ include(${LLVM_COMMON_CMAKE_UTILS}/Modules/CMakePolicy.cmake set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON) +if(NOT DEFINED OPENCILK_VERSION_MAJOR) + set(OPENCILK_VERSION_MAJOR 2) +endif() +if(NOT DEFINED OPENCILK_VERSION_MINOR) + set(OPENCILK_VERSION_MINOR 0) +endif() +if(NOT DEFINED OPENCILK_VERSION_PATCH) + set(OPENCILK_VERSION_PATCH 0) +endif() +if(NOT DEFINED OPENCILK_VERSION_SUFFIX) + set(OPENCILK_VERSION_SUFFIX) +endif() + if(NOT DEFINED LLVM_VERSION_MAJOR) set(LLVM_VERSION_MAJOR 17) endif() @@ -21,11 +34,26 @@ if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) endif() +if(NOT DEFINED TAPIR_VERSION_MAJOR) + set(TAPIR_VERSION_MAJOR 1) +endif() +if(NOT DEFINED TAPIR_VERSION_MINOR) + set(TAPIR_VERSION_MINOR 0) +endif() +if(NOT DEFINED TAPIR_VERSION_PATCH) + set(TAPIR_VERSION_PATCH 0) +endif() + if (NOT PACKAGE_VERSION) set(PACKAGE_VERSION "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}") endif() +if (NOT OPENCILK_PACKAGE_VERSION) + set(OPENCILK_PACKAGE_VERSION + "${OPENCILK_VERSION_MAJOR}.${OPENCILK_VERSION_MINOR}.${OPENCILK_VERSION_PATCH}${OPENCILK_VERSION_SUFFIX}") +endif() + if(NOT DEFINED LLVM_SHLIB_SYMBOL_VERSION) # "Symbol version prefix for libLLVM.so" set(LLVM_SHLIB_SYMBOL_VERSION "LLVM_${LLVM_VERSION_MAJOR}") @@ -116,7 +144,7 @@ endif() # LLVM_EXTERNAL_${project}_SOURCE_DIR using LLVM_ALL_PROJECTS # This allows an easy way of setting up a build directory for llvm and another # one for llvm+clang+... using the same sources. -set(LLVM_ALL_PROJECTS "bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly;pstl") +set(LLVM_ALL_PROJECTS "bolt;cheetah;cilktools;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly;pstl") # The flang project is not yet part of "all" projects (see C++ requirements) set(LLVM_EXTRA_PROJECTS "flang") # List of all known projects in the mono repo @@ -149,7 +177,7 @@ endif() # As we migrate runtimes to using the bootstrapping build, the set of default runtimes # should grow as we remove those runtimes from LLVM_ENABLE_PROJECTS above. set(LLVM_DEFAULT_RUNTIMES "libcxx;libcxxabi;libunwind") -set(LLVM_SUPPORTED_RUNTIMES "libc;libunwind;libcxxabi;pstl;libcxx;compiler-rt;openmp;llvm-libgcc") +set(LLVM_SUPPORTED_RUNTIMES "libc;libunwind;libcxxabi;pstl;libcxx;compiler-rt;cheetah;cilktools;openmp;llvm-libgcc") set(LLVM_ENABLE_RUNTIMES "" CACHE STRING "Semicolon-separated list of runtimes to build, or \"all\" (${LLVM_DEFAULT_RUNTIMES}). Supported runtimes are ${LLVM_SUPPORTED_RUNTIMES}.") if(LLVM_ENABLE_RUNTIMES STREQUAL "all") @@ -329,23 +357,26 @@ option(LLVM_TOOL_LLVM_DRIVER_BUILD "Enables building the llvm multicall tool" OF set(PACKAGE_NAME LLVM) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") -set(PACKAGE_BUGREPORT "https://github.com/llvm/llvm-project/issues/") +set(PACKAGE_BUGREPORT "https://github.com/OpenCilk/opencilk-project/issues/") set(BUG_REPORT_URL "${PACKAGE_BUGREPORT}" CACHE STRING "Default URL where bug reports are to be submitted.") # Configure CPack. +if(NOT DEFINED CPACK_PACKAGE_NAME) + set(CPACK_PACKAGE_NAME "OpenCilk") +endif() if(NOT DEFINED CPACK_PACKAGE_INSTALL_DIRECTORY) set(CPACK_PACKAGE_INSTALL_DIRECTORY "LLVM") endif() if(NOT DEFINED CPACK_PACKAGE_VENDOR) - set(CPACK_PACKAGE_VENDOR "LLVM") + set(CPACK_PACKAGE_VENDOR "OpenCilk") endif() -set(CPACK_PACKAGE_VERSION_MAJOR ${LLVM_VERSION_MAJOR}) -set(CPACK_PACKAGE_VERSION_MINOR ${LLVM_VERSION_MINOR}) -set(CPACK_PACKAGE_VERSION_PATCH ${LLVM_VERSION_PATCH}) -set(CPACK_PACKAGE_VERSION ${PACKAGE_VERSION}) -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE.TXT") +set(CPACK_PACKAGE_VERSION_MAJOR ${OPENCILK_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MINOR ${OPENCILK_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${OPENCILK_VERSION_PATCH}) +set(CPACK_PACKAGE_VERSION ${OPENCILK_PACKAGE_VERSION}) +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/../MIT_LICENSE.TXT") if(WIN32 AND NOT UNIX) set(CPACK_NSIS_COMPRESSOR "/SOLID lzma \r\n SetCompressorDictSize 32") if(NOT DEFINED CPACK_PACKAGE_INSTALL_REGISTRY_KEY) diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT index d81b757bad7fcf..2f8305874b404e 100644 --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -212,6 +212,10 @@ N: Duncan Sands E: baldrick@free.fr D: DragonEgg +N: Tao B. Schardl +E: neboat@mit.edu +D: Tapir, CSI and CilkSanitizer instrumentation passes + N: Mark Schimmel E: marksl@synopsys.com D: ARC backend (lib/Target/ARC/*) diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT index 0ae5cb8d32ff7a..89ac0121322398 100644 --- a/llvm/CREDITS.TXT +++ b/llvm/CREDITS.TXT @@ -74,6 +74,10 @@ N: Brendon Cahoon E: bcahoon@codeaurora.org D: Loop unrolling with run-time trip counts. +N: John Carr +E: jfc@mit.edu +D: OpenCilk frontend, Tapir lowering to OpenCilk, _Hyperobject type, Tapir function attributes + N: Chandler Carruth E: chandlerc@gmail.com E: chandlerc@google.com @@ -115,6 +119,10 @@ N: Anshuman Dasgupta E: adasgupt@codeaurora.org D: Deterministic finite automaton based infrastructure for VLIW packetization +N: Tyler Denniston +E: denniston.t@gmail.com +D: CSI instrumentation pass and runtime + N: Stefanus Du Toit E: stefanus.du.toit@intel.com D: Bug fixes and minor improvements @@ -311,6 +319,10 @@ W: https://apt.llvm.org/ D: Debian and Ubuntu packaging D: Continuous integration with jenkins +N: I-Ting Angelina Lee +E: angelee@wustl.edu +D: cilksan + N: Andrew Lenharth E: alenhar2@cs.uiuc.edu W: http://www.lenharth.org/~andrewl/ @@ -362,6 +374,10 @@ N: Scott Michel E: scottm@aero.org D: Added STI Cell SPU backend. +N: William S. Moses +E: wmoses@mit.edu +D: Tapir, Tapir lowering passes for Cilk and OpenMP + N: Kai Nacke E: kai@redstar.de D: Support for implicit TLS model used with MS VC runtime @@ -481,6 +497,11 @@ N: Alina Sbirlea E: alina.sbirlea@gmail.com D: MemorySSA, BatchAA, misc loop and new pass manager work. +N: Tao B. Schardl +E: neboat@mit.edu +D: Tapir, Cilk frontend, Tapir lowering passes for Cilk +D: CSI, cilksan, cilkscale + N: Arnold Schwaighofer E: arnold.schwaighofer@gmail.com D: Tail call optimization for the x86 backend @@ -533,6 +554,10 @@ E: lauro.venancio@indt.org.br D: ARM backend improvements D: Thread Local Storage implementation +N: Daniele Vettorel +E: vettoreldaniele@gmail.com +D: CSI modifications to support JIT compilation + N: Phoebe Wang E: phoebe.wang@intel.com D: X86 bug fixes and new instruction support. diff --git a/llvm/README.md b/llvm/README.md new file mode 100644 index 00000000000000..2bd1521d357436 --- /dev/null +++ b/llvm/README.md @@ -0,0 +1,23 @@ +Tapir/LLVM +================================ + +This directory and its subdirectories contain source code for +Tapir/LLVM, a prototype compiler based on LLVM that implements the +Tapir compiler IR extensions for fork-join parallelism. + +Tapir/LLVM is under active development. This directory contains +prototype implementations of compiler technologies that take advantage +of the Tapir compiler IR. + +Tapir/LLVM is open source software. You may freely distribute it +under the terms of the license agreement found in LICENSE.txt. + +![](https://github.com/wsmoses/Tapir-LLVM/workflows/Tapir%20CI/badge.svg) + + +# References + +T. B. Schardl, W. S. Moses, C. E. Leiserson. "Tapir: Embedding +Fork-Join Parallelism into LLVM's Intermediate Representation." ACM +PPoPP, February 2017, pp. 249-265. Won Best Paper Award. +http://dl.acm.org/citation.cfm?id=3018758 diff --git a/llvm/README.txt b/llvm/README.txt deleted file mode 100644 index b9b71a3b6daff1..00000000000000 --- a/llvm/README.txt +++ /dev/null @@ -1,17 +0,0 @@ -The LLVM Compiler Infrastructure -================================ - -This directory and its subdirectories contain source code for LLVM, -a toolkit for the construction of highly optimized compilers, -optimizers, and runtime environments. - -LLVM is open source software. You may freely distribute it under the terms of -the license agreement found in LICENSE.txt. - -Please see the documentation provided in docs/ for further -assistance with LLVM, and in particular docs/GettingStarted.rst for getting -started with LLVM and docs/README.txt for an overview of LLVM's -documentation setup. - -If you are writing a package for LLVM, see docs/Packaging.rst for our -suggestions. diff --git a/llvm/WORKSPACE b/llvm/WORKSPACE new file mode 100644 index 00000000000000..920b03c8faf0e9 --- /dev/null +++ b/llvm/WORKSPACE @@ -0,0 +1 @@ +workspace( name = "llvm" ) diff --git a/llvm/bindings/ocaml/llvm/META.llvm.in b/llvm/bindings/ocaml/llvm/META.llvm.in index 7c87039ea46546..a8f9bb3bcef692 100644 --- a/llvm/bindings/ocaml/llvm/META.llvm.in +++ b/llvm/bindings/ocaml/llvm/META.llvm.in @@ -53,6 +53,14 @@ package "irreader" ( archive(native) = "llvm_irreader.cmxa" ) +package "tapir_opts" ( + requires = "llvm" + version = "@PACKAGE_VERSION@" + description = "Tapir Transforms for LLVM" + archive(byte) = "llvm_tapir_opts.cma" + archive(native) = "llvm_tapir_opts.cmxa" +) + package "transform_utils" ( requires = "llvm" version = "@PACKAGE_VERSION@" diff --git a/llvm/bindings/ocaml/llvm/llvm.ml b/llvm/bindings/ocaml/llvm/llvm.ml index 77de9a6e46fae3..6ef2749cccb703 100644 --- a/llvm/bindings/ocaml/llvm/llvm.ml +++ b/llvm/bindings/ocaml/llvm/llvm.ml @@ -1349,6 +1349,15 @@ external build_icmp : Icmp.t -> llvalue -> llvalue -> string -> external build_fcmp : Fcmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue = "llvm_build_fcmp" +(*--.. Parallel constructs .................................................--*) + +external build_detach : llbasicblock -> llbasicblock -> llvalue -> llbuilder -> + llvalue = "llvm_build_detach" +external build_reattach : llbasicblock -> llvalue -> llbuilder -> llvalue + = "llvm_build_reattach" +external build_sync : llbasicblock -> llvalue -> llbuilder -> llvalue + = "llvm_build_sync" + (*--... Miscellaneous instructions .........................................--*) external build_phi : (llvalue * llbasicblock) list -> string -> llbuilder -> llvalue = "llvm_build_phi" diff --git a/llvm/bindings/ocaml/llvm/llvm.mli b/llvm/bindings/ocaml/llvm/llvm.mli index 9c8b3b883e1498..72f517dfd88cbd 100644 --- a/llvm/bindings/ocaml/llvm/llvm.mli +++ b/llvm/bindings/ocaml/llvm/llvm.mli @@ -2498,6 +2498,27 @@ val build_fcmp : Fcmp.t -> llvalue -> llvalue -> string -> llbuilder -> llvalue +(** {Parallel constructs} *) + +(** [build_detach dbb cbb r b] creates a + [detach within %r, %dbb, %cbb] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateDetach]. *) +val build_detach : llbasicblock -> llbasicblock -> llvalue -> llbuilder -> + llvalue + +(** [build_reattach bb r b] creates a + [reattach within %r, %bb] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateReattach]. *) +val build_reattach : llbasicblock -> llvalue -> llbuilder -> llvalue + +(** [build_sync bb r b] creates a + [sync within %r, %bb] + instruction at the position specified by the instruction builder [b]. + See the method [llvm::LLVMBuilder::CreateSync]. *) +val build_sync : llbasicblock -> llvalue -> llbuilder -> llvalue + (** {7 Miscellaneous instructions} *) (** [build_phi incoming name b] creates a diff --git a/llvm/bindings/ocaml/llvm/llvm_ocaml.c b/llvm/bindings/ocaml/llvm/llvm_ocaml.c index 0154b2f49c2528..608f6013bf70e3 100644 --- a/llvm/bindings/ocaml/llvm/llvm_ocaml.c +++ b/llvm/bindings/ocaml/llvm/llvm_ocaml.c @@ -2678,6 +2678,30 @@ value llvm_build_fcmp(value Pred, value LHS, value RHS, value Name, value B) { Value_val(RHS), String_val(Name))); } +/*--.. Parallel constructs .................................................--*/ + +/* llbasicblock -> llbasicblock -> llvalue -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_detach(LLVMBasicBlockRef DetachBB, + LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion, + LLVMBuilderRef B) { + return LLVMBuildDetach(Builder_val(B), DetachBB, ContinueBB, SyncRegion); +} + +/* llbasicblock -> llvalue -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_reattach(LLVMBasicBlockRef ReattachBB, + LLVMValueRef SyncRegion, + LLVMBuilderRef B) { + return LLVMBuildReattach(Builder_val(B), ReattachBB, SyncRegion); +} + +/* llbasicblock -> llvalue -> llbuilder -> llvalue */ +CAMLprim LLVMValueRef llvm_build_sync(LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion, + LLVMBuilderRef B) { + return LLVMBuildSync(Builder_val(B), ContinueBB, SyncRegion); +} + /*--... Miscellaneous instructions .........................................--*/ /* (llvalue * llbasicblock) list -> string -> llbuilder -> llvalue */ diff --git a/llvm/bindings/ocaml/transforms/CMakeLists.txt b/llvm/bindings/ocaml/transforms/CMakeLists.txt index 0628d6763874ed..54906234f16d36 100644 --- a/llvm/bindings/ocaml/transforms/CMakeLists.txt +++ b/llvm/bindings/ocaml/transforms/CMakeLists.txt @@ -1,2 +1,3 @@ +add_subdirectory(tapir_opts) add_subdirectory(utils) diff --git a/llvm/bindings/ocaml/transforms/tapir_opts/CMakeLists.txt b/llvm/bindings/ocaml/transforms/tapir_opts/CMakeLists.txt new file mode 100644 index 00000000000000..d315ca0c9ab660 --- /dev/null +++ b/llvm/bindings/ocaml/transforms/tapir_opts/CMakeLists.txt @@ -0,0 +1,5 @@ +add_ocaml_library(llvm_tapir_opts + OCAML llvm_tapir_opts + OCAMLDEP llvm + C tapir_opts_ocaml + LLVM tapiropts) diff --git a/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.ml b/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.ml new file mode 100644 index 00000000000000..1a12243265a716 --- /dev/null +++ b/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.ml @@ -0,0 +1,19 @@ +(*===-- llvm_tapir_opts.ml - LLVM OCaml Interface -------------*- OCaml -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===*) + +(** Tapir pass to install Cilky (or other target-specific) stuff in place of + detach/sync instructions. *) +external add_lower_tapir_to_target : + [ `Module ] Llvm.PassManager.t -> unit + = "llvm_add_lower_tapir_to_target" + +(** Tapir pass to spawn loops with recursive divide-and-conquer. *) +external add_loop_spawning : + [ `Module ] Llvm.PassManager.t -> unit + = "llvm_add_loop_spawning" diff --git a/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.mli b/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.mli new file mode 100644 index 00000000000000..1a12243265a716 --- /dev/null +++ b/llvm/bindings/ocaml/transforms/tapir_opts/llvm_tapir_opts.mli @@ -0,0 +1,19 @@ +(*===-- llvm_tapir_opts.ml - LLVM OCaml Interface -------------*- OCaml -*-===* + * + * The LLVM Compiler Infrastructure + * + * This file is distributed under the University of Illinois Open Source + * License. See LICENSE.TXT for details. + * + *===----------------------------------------------------------------------===*) + +(** Tapir pass to install Cilky (or other target-specific) stuff in place of + detach/sync instructions. *) +external add_lower_tapir_to_target : + [ `Module ] Llvm.PassManager.t -> unit + = "llvm_add_lower_tapir_to_target" + +(** Tapir pass to spawn loops with recursive divide-and-conquer. *) +external add_loop_spawning : + [ `Module ] Llvm.PassManager.t -> unit + = "llvm_add_loop_spawning" diff --git a/llvm/bindings/ocaml/transforms/tapir_opts/tapir_opts_ocaml.c b/llvm/bindings/ocaml/transforms/tapir_opts/tapir_opts_ocaml.c new file mode 100644 index 00000000000000..207b9549b90896 --- /dev/null +++ b/llvm/bindings/ocaml/transforms/tapir_opts/tapir_opts_ocaml.c @@ -0,0 +1,33 @@ +/*===-- tapir_opts_ocaml.c - LLVM OCaml Glue --------------------*- C++ -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This file glues LLVM's OCaml interface to its C interface. These functions *| +|* are by and large transparent wrappers to the corresponding C functions. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#include "caml/custom.h" +#include "llvm-c/Transforms/PassManagerBuilder.h" +#include "llvm-c/Transforms/Tapir.h" + +/* [`Module] Llvm.PassManager.t -> unit + */ +CAMLprim value llvm_add_lower_tapir_to_target(LLVMPassManagerRef PM) +{ + LLVMAddLowerTapirToTargetPass(PM); + return Val_unit; +} + +/* [`Module] Llvm.PassManager.t -> unit + */ +CAMLprim value llvm_add_loop_spawning(LLVMPassManagerRef PM) +{ + LLVMAddLoopSpawningPass(PM); + return Val_unit; +} diff --git a/llvm/cmake/config.guess b/llvm/cmake/config.guess index 71abbf939f97f4..71d266ad8bdaca 100644 --- a/llvm/cmake/config.guess +++ b/llvm/cmake/config.guess @@ -4,7 +4,7 @@ # 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, # 2011 Free Software Foundation, Inc. -timestamp='2011-08-20' +timestamp='2021-03-08' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by diff --git a/llvm/examples/Kaleidoscope/CMakeLists.txt b/llvm/examples/Kaleidoscope/CMakeLists.txt index 3822cdd9e1c49f..06a1706bfe7eea 100644 --- a/llvm/examples/Kaleidoscope/CMakeLists.txt +++ b/llvm/examples/Kaleidoscope/CMakeLists.txt @@ -15,3 +15,4 @@ add_subdirectory(Chapter6) add_subdirectory(Chapter7) add_subdirectory(Chapter8) add_subdirectory(Chapter9) +add_subdirectory(Tapir) diff --git a/llvm/examples/Kaleidoscope/Tapir/CMakeLists.txt b/llvm/examples/Kaleidoscope/Tapir/CMakeLists.txt new file mode 100644 index 00000000000000..8bd3d0e9f2908f --- /dev/null +++ b/llvm/examples/Kaleidoscope/Tapir/CMakeLists.txt @@ -0,0 +1,20 @@ +set(LLVM_LINK_COMPONENTS + Analysis + Core + ExecutionEngine + InstCombine + Object + OrcJIT + RuntimeDyld + ScalarOpts + Support + TapirOpts + TransformUtils + native + ) + +add_kaleidoscope_chapter(Kaleidoscope-Tapir + toy.cpp + ) + +export_executable_symbols(Kaleidoscope-Tapir) diff --git a/llvm/examples/Kaleidoscope/Tapir/KaleidoscopeJIT.h b/llvm/examples/Kaleidoscope/Tapir/KaleidoscopeJIT.h new file mode 100644 index 00000000000000..38da08ffe2c9f4 --- /dev/null +++ b/llvm/examples/Kaleidoscope/Tapir/KaleidoscopeJIT.h @@ -0,0 +1,206 @@ +//===- KaleidoscopeJIT.h - A simple JIT for Kaleidoscope --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Contains a simple JIT definition for use in the kaleidoscope tutorials. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H +#define LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/FormatVariadic.h" +#include + +namespace llvm { +namespace orc { + +class KaleidoscopeJIT { +private: + std::unique_ptr ES; + + DataLayout DL; + MangleAndInterner Mangle; + + RTDyldObjectLinkingLayer ObjectLayer; + IRCompileLayer CompileLayer; + IRTransformLayer InitHelperTransformLayer; + + JITDylib &MainJD; + + SymbolLookupSet InitFunctions; + SymbolLookupSet DeInitFunctions; + + /// This transform parses llvm.global_ctors to produce a single initialization + /// function for the module, records the function, then deletes + /// llvm.global_ctors. + class GlobalCtorDtorScraper { + public: + GlobalCtorDtorScraper(ExecutionSession &ES, SymbolLookupSet &InitFunctions, + StringRef InitFunctionPrefix) + : ES(ES), InitFunctions(InitFunctions), + InitFunctionPrefix(InitFunctionPrefix) {} + Expected operator()(ThreadSafeModule TSM, + MaterializationResponsibility &R) { + auto Err = TSM.withModuleDo([&](Module &M) -> Error { + auto &Ctx = M.getContext(); + auto *GlobalCtors = M.getNamedGlobal("llvm.global_ctors"); + // If there's no llvm.global_ctors or it's just a decl then skip. + if (!GlobalCtors || GlobalCtors->isDeclaration()) + return Error::success(); + + std::string InitFunctionName; + raw_string_ostream(InitFunctionName) + << InitFunctionPrefix << M.getModuleIdentifier(); + + MangleAndInterner Mangle(ES, M.getDataLayout()); + auto InternedName = Mangle(InitFunctionName); + if (auto Err = R.defineMaterializing( + {{InternedName, JITSymbolFlags::Callable}})) + return Err; + + auto *InitFunc = Function::Create( + FunctionType::get(Type::getVoidTy(Ctx), {}, false), + GlobalValue::ExternalLinkage, InitFunctionName, &M); + InitFunc->setVisibility(GlobalValue::HiddenVisibility); + std::vector> Inits; + for (auto E : getConstructors(M)) + Inits.push_back(std::make_pair(E.Func, E.Priority)); + llvm::sort(Inits, [](const std::pair &LHS, + const std::pair &RHS) { + return LHS.first < RHS.first; + }); + auto *EntryBlock = BasicBlock::Create(Ctx, "entry", InitFunc); + IRBuilder<> IB(EntryBlock); + for (auto &KV : Inits) + IB.CreateCall(KV.first); + IB.CreateRetVoid(); + + ES.runSessionLocked([&]() { InitFunctions.add(InternedName); }); + GlobalCtors->eraseFromParent(); + return Error::success(); + }); + + if (Err) + return std::move(Err); + + return std::move(TSM); + } + + private: + ExecutionSession &ES; + SymbolLookupSet &InitFunctions; + StringRef InitFunctionPrefix; + }; + +public: + KaleidoscopeJIT(std::unique_ptr ES, + JITTargetMachineBuilder JTMB, DataLayout DL) + : ES(std::move(ES)), DL(std::move(DL)), + Mangle(*this->ES, this->DL), + ObjectLayer(*this->ES, + []() { return std::make_unique(); }), + CompileLayer(*this->ES, ObjectLayer, + std::make_unique(std::move(JTMB))), + InitHelperTransformLayer( + *this->ES, CompileLayer, + GlobalCtorDtorScraper(*this->ES, InitFunctions, "my_init.")), + MainJD(this->ES->createBareJITDylib("

")) { + MainJD.addGenerator( + cantFail(DynamicLibrarySearchGenerator::GetForCurrentProcess( + DL.getGlobalPrefix()))); + } + + ~KaleidoscopeJIT() { + if (auto Err = ES->endSession()) + ES->reportError(std::move(Err)); + } + + static Expected> Create() { + auto SSP = std::make_shared(); + auto EPC = SelfExecutorProcessControl::Create(); + if (!EPC) + return EPC.takeError(); + + auto ES = std::make_unique(std::move(*EPC)); + + JITTargetMachineBuilder JTMB( + ES->getExecutorProcessControl().getTargetTriple()); + + auto DL = JTMB.getDefaultDataLayoutForTarget(); + if (!DL) + return DL.takeError(); + + return std::make_unique(std::move(ES), + std::move(JTMB), std::move(*DL)); + } + + const DataLayout &getDataLayout() const { return DL; } + + JITDylib &getMainJITDylib() { return MainJD; } + + void loadLibrary(const char *FileName) { + MainJD.addGenerator(cantFail( + DynamicLibrarySearchGenerator::Load(FileName, DL.getGlobalPrefix()))); + } + + Error addModule(ThreadSafeModule TSM, ResourceTrackerSP RT = nullptr) { + if (!RT) + RT = MainJD.getDefaultResourceTracker(); + return InitHelperTransformLayer.add(RT, std::move(TSM)); + } + + Error initialize() { + if (InitFunctions.empty()) + // Nothing to do if there are no initializers. + return Error::success(); + + // Lookup the symbols for the initializer functions. + DenseMap LookupSymbols; + LookupSymbols[&MainJD] = std::move(InitFunctions); + auto LookupResult = Platform::lookupInitSymbols(*ES, LookupSymbols); + if (!LookupResult) + return LookupResult.takeError(); + + // Collect the addresses of those symbols. + std::vector Initializers; + auto InitsItr = LookupResult->find(&MainJD); + for (auto &KV : InitsItr->second) + Initializers.push_back(KV.second.getAddress()); + + // Run all initializer functions. + for (auto InitFnAddr : Initializers) { + auto *InitFn = InitFnAddr.toPtr(); + InitFn(); + } + return Error::success(); + } + + Expected lookup(StringRef Name) { + return ES->lookup({&MainJD}, Mangle(Name.str())); + } +}; + +} // end namespace orc +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_ORC_KALEIDOSCOPEJIT_H diff --git a/llvm/examples/Kaleidoscope/Tapir/toy.cpp b/llvm/examples/Kaleidoscope/Tapir/toy.cpp new file mode 100644 index 00000000000000..1aa1e1bb24bd75 --- /dev/null +++ b/llvm/examples/Kaleidoscope/Tapir/toy.cpp @@ -0,0 +1,2163 @@ +#include "KaleidoscopeJIT.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TapirRaceDetect.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRPrinter/IRPrintingPasses.h" +#include "llvm/Support/Process.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/Timer.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/TargetParser/Host.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Instrumentation/CilkSanitizer.h" +#include "llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Scalar/LoopRotation.h" +#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" +#include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Tapir.h" +#include "llvm/Transforms/Tapir/LoopSpawningTI.h" +#include "llvm/Transforms/Tapir/TapirToTarget.h" +#include "llvm/Transforms/Utils.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace llvm; +using namespace llvm::orc; + +//===----------------------------------------------------------------------===// +// Lexer +//===----------------------------------------------------------------------===// + +// The lexer returns tokens [0-255] if it is an unknown character, otherwise one +// of these for known things. +enum Token { + tok_eof = -1, + + // commands + tok_def = -2, + tok_extern = -3, + + // primary + tok_identifier = -4, + tok_number = -5, + tok_integer = -6, + + // control + tok_if = -7, + tok_then = -8, + tok_else = -9, + tok_for = -10, + tok_in = -11, + + // operators + tok_binary = -12, + tok_unary = -13, + + // var definition + tok_var = -14, + + // parallel control + tok_spawn = -15, + tok_sync = -16, + tok_parfor = -17 +}; + +static std::string IdentifierStr; // Filled in if tok_identifier +static int64_t IntVal; // Filled in if tok_integer +static double NumVal; // Filled in if tok_number + +/// gettok - Return the next token from standard input. +static int gettok() { + static int LastChar = ' '; + + // Skip any whitespace. + while (isspace(LastChar)) + LastChar = getchar(); + + if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]* + IdentifierStr = LastChar; + while (isalnum((LastChar = getchar()))) + IdentifierStr += LastChar; + + if (IdentifierStr == "def") + return tok_def; + if (IdentifierStr == "extern") + return tok_extern; + if (IdentifierStr == "if") + return tok_if; + if (IdentifierStr == "then") + return tok_then; + if (IdentifierStr == "else") + return tok_else; + if (IdentifierStr == "for") + return tok_for; + if (IdentifierStr == "in") + return tok_in; + if (IdentifierStr == "binary") + return tok_binary; + if (IdentifierStr == "unary") + return tok_unary; + if (IdentifierStr == "var") + return tok_var; + if (IdentifierStr == "spawn") + return tok_spawn; + if (IdentifierStr == "sync") + return tok_sync; + if (IdentifierStr == "parfor") + return tok_parfor; + return tok_identifier; + } + + { + std::string NumStr; + if (isdigit(LastChar)) { // Integer: [0-9]+ + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar)); + if (LastChar != '.') { + IntVal = strtol(NumStr.c_str(), nullptr, 10); + return tok_integer; + } + } + if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+ + // std::string NumStr; + do { + NumStr += LastChar; + LastChar = getchar(); + } while (isdigit(LastChar) || LastChar == '.'); + + NumVal = strtod(NumStr.c_str(), nullptr); + return tok_number; + } + } + + if (LastChar == '#') { + // Comment until end of line. + do + LastChar = getchar(); + while (LastChar != EOF && LastChar != '\n' && LastChar != '\r'); + + if (LastChar != EOF) + return gettok(); + } + + // Check for end of file. Don't eat the EOF. + if (LastChar == EOF) + return tok_eof; + + // Otherwise, just return the character as its ascii value. + int ThisChar = LastChar; + LastChar = getchar(); + return ThisChar; +} + +//===----------------------------------------------------------------------===// +// Abstract Syntax Tree (aka Parse Tree) +//===----------------------------------------------------------------------===// + +namespace { + +/// ExprAST - Base class for all expression nodes. +class ExprAST { +public: + virtual ~ExprAST() = default; + + virtual Value *codegen() = 0; + virtual void setIntegerRes(bool v = true) {} +}; + +/// IntegerExprAST - Expression class for integer literals like "1". +class IntegerExprAST : public ExprAST { + int64_t Val; + +public: + IntegerExprAST(int64_t Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// NumberExprAST - Expression class for numeric literals like "1.0". +class NumberExprAST : public ExprAST { + double Val; + +public: + NumberExprAST(double Val) : Val(Val) {} + + Value *codegen() override; +}; + +/// VariableExprAST - Expression class for referencing a variable, like "a". +class VariableExprAST : public ExprAST { + std::string Name; + +public: + VariableExprAST(const std::string &Name) : Name(Name) {} + + Value *codegen() override; + const std::string &getName() const { return Name; } +}; + +/// UnaryExprAST - Expression class for a unary operator. +class UnaryExprAST : public ExprAST { + char Opcode; + std::unique_ptr Operand; + +public: + UnaryExprAST(char Opcode, std::unique_ptr Operand) + : Opcode(Opcode), Operand(std::move(Operand)) {} + + Value *codegen() override; +}; + +/// BinaryExprAST - Expression class for a binary operator. +class BinaryExprAST : public ExprAST { + char Op; + bool IntegerRes = false; + std::unique_ptr LHS, RHS; + +public: + BinaryExprAST(char Op, std::unique_ptr LHS, + std::unique_ptr RHS) + : Op(Op), LHS(std::move(LHS)), RHS(std::move(RHS)) {} + + Value *codegen() override; + void setIntegerRes(bool v = true) override { IntegerRes = v; } +}; + +/// CallExprAST - Expression class for function calls. +class CallExprAST : public ExprAST { + std::string Callee; + std::vector> Args; + +public: + CallExprAST(const std::string &Callee, + std::vector> Args) + : Callee(Callee), Args(std::move(Args)) {} + + Value *codegen() override; +}; + +/// IfExprAST - Expression class for if/then/else. +class IfExprAST : public ExprAST { + std::unique_ptr Cond, Then, Else; + +public: + IfExprAST(std::unique_ptr Cond, std::unique_ptr Then, + std::unique_ptr Else) + : Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {} + + Value *codegen() override; +}; + +/// ForExprAST - Expression class for for/in. +class ForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr Start, End, Step, Body; + +public: + ForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// VarExprAST - Expression class for var/in +class VarExprAST : public ExprAST { + std::vector>> VarNames; + std::unique_ptr Body; + +public: + VarExprAST( + std::vector>> VarNames, + std::unique_ptr Body) + : VarNames(std::move(VarNames)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// SpawnExprAST - Expression class for spawn. +class SpawnExprAST : public ExprAST { + std::unique_ptr Spawned; + +public: + SpawnExprAST(std::unique_ptr Spawned) + : Spawned(std::move(Spawned)) {} + + Value *codegen() override; +}; + +/// SyncExprAST - Expression class for spawn. +class SyncExprAST : public ExprAST { +public: + SyncExprAST() {} + + Value *codegen() override; +}; + +/// ParForExprAST - Expression class for parfor/in. +class ParForExprAST : public ExprAST { + std::string VarName; + std::unique_ptr Start, End, Step, Body; + +public: + ParForExprAST(const std::string &VarName, std::unique_ptr Start, + std::unique_ptr End, std::unique_ptr Step, + std::unique_ptr Body) + : VarName(VarName), Start(std::move(Start)), End(std::move(End)), + Step(std::move(Step)), Body(std::move(Body)) {} + + Value *codegen() override; +}; + +/// PrototypeAST - This class represents the "prototype" for a function, +/// which captures its name, and its argument names (thus implicitly the number +/// of arguments the function takes), as well as if it is an operator. +class PrototypeAST { + std::string Name; + std::vector Args; + bool IsOperator; + unsigned Precedence; // Precedence if a binary op. + +public: + PrototypeAST(const std::string &Name, std::vector Args, + bool IsOperator = false, unsigned Prec = 0) + : Name(Name), Args(std::move(Args)), IsOperator(IsOperator), + Precedence(Prec) {} + + Function *codegen(); + const std::string &getName() const { return Name; } + + bool isUnaryOp() const { return IsOperator && Args.size() == 1; } + bool isBinaryOp() const { return IsOperator && Args.size() == 2; } + + char getOperatorName() const { + assert(isUnaryOp() || isBinaryOp()); + return Name[Name.size() - 1]; + } + + unsigned getBinaryPrecedence() const { return Precedence; } +}; + +/// FunctionAST - This class represents a function definition itself. +class FunctionAST { + std::unique_ptr Proto; + std::unique_ptr Body; + +public: + FunctionAST(std::unique_ptr Proto, + std::unique_ptr Body) + : Proto(std::move(Proto)), Body(std::move(Body)) {} + + Function *codegen(); +}; + +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Parser +//===----------------------------------------------------------------------===// + +/// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current +/// token the parser is looking at. getNextToken reads another token from the +/// lexer and updates CurTok with its results. +static int CurTok; +static int getNextToken() { return CurTok = gettok(); } + +/// BinopPrecedence - This holds the precedence for each binary operator that is +/// defined. +static std::map BinopPrecedence; + +/// GetTokPrecedence - Get the precedence of the pending binary operator token. +static int GetTokPrecedence() { + if (!isascii(CurTok)) + return -1; + + // Make sure it's a declared binop. + int TokPrec = BinopPrecedence[CurTok]; + if (TokPrec <= 0) + return -1; + return TokPrec; +} + +/// LogError* - These are little helper functions for error handling. +std::unique_ptr LogError(const char *Str) { + fprintf(stderr, "Error: %s\n", Str); + return nullptr; +} + +std::unique_ptr LogErrorP(const char *Str) { + LogError(Str); + return nullptr; +} + +static std::unique_ptr ParseExpression(); + +/// integerexpr ::= integer +static std::unique_ptr ParseIntegerExpr() { + auto Result = std::make_unique(IntVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// numberexpr ::= number +static std::unique_ptr ParseNumberExpr() { + auto Result = std::make_unique(NumVal); + getNextToken(); // consume the number + return std::move(Result); +} + +/// parenexpr ::= '(' expression ')' +static std::unique_ptr ParseParenExpr() { + getNextToken(); // eat (. + auto V = ParseExpression(); + if (!V) + return nullptr; + + if (CurTok != ')') + return LogError("expected ')'"); + getNextToken(); // eat ). + return V; +} + +/// identifierexpr +/// ::= identifier +/// ::= identifier '(' expression* ')' +static std::unique_ptr ParseIdentifierExpr() { + std::string IdName = IdentifierStr; + + getNextToken(); // eat identifier. + + if (CurTok != '(') // Simple variable ref. + return std::make_unique(IdName); + + // Call. + getNextToken(); // eat ( + std::vector> Args; + if (CurTok != ')') { + while (true) { + if (auto Arg = ParseExpression()) + Args.push_back(std::move(Arg)); + else + return nullptr; + + if (CurTok == ')') + break; + + if (CurTok != ',') + return LogError("Expected ')' or ',' in argument list"); + getNextToken(); + } + } + + // Eat the ')'. + getNextToken(); + + return std::make_unique(IdName, std::move(Args)); +} + +/// ifexpr ::= 'if' expression 'then' expression 'else' expression +static std::unique_ptr ParseIfExpr() { + getNextToken(); // eat the if. + + // condition. + auto Cond = ParseExpression(); + if (!Cond) + return nullptr; + + if (CurTok != tok_then) + return LogError("expected then"); + getNextToken(); // eat the then + + auto Then = ParseExpression(); + if (!Then) + return nullptr; + + if (CurTok != tok_else) + return LogError("expected else"); + + getNextToken(); + + auto Else = ParseExpression(); + if (!Else) + return nullptr; + + return std::make_unique(std::move(Cond), std::move(Then), + std::move(Else)); +} + +/// forexpr ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr ParseForExpr() { + getNextToken(); // eat the for. + + if (CurTok != tok_identifier) + return LogError("expected identifier after for"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique(IdName, std::move(Start), std::move(End), + std::move(Step), std::move(Body)); +} + +/// varexpr ::= 'var' identifier ('=' expression)? +// (',' identifier ('=' expression)?)* 'in' expression +static std::unique_ptr ParseVarExpr() { + getNextToken(); // eat the var. + + std::vector>> VarNames; + + // At least one variable name is required. + if (CurTok != tok_identifier) + return LogError("expected identifier after var"); + + while (true) { + std::string Name = IdentifierStr; + getNextToken(); // eat identifier. + + // Read the optional initializer. + std::unique_ptr Init = nullptr; + if (CurTok == '=') { + getNextToken(); // eat the '='. + + Init = ParseExpression(); + if (!Init) + return nullptr; + } + + VarNames.push_back(std::make_pair(Name, std::move(Init))); + + // End of var list, exit loop. + if (CurTok != ',') + break; + getNextToken(); // eat the ','. + + if (CurTok != tok_identifier) + return LogError("expected identifier list after var"); + } + + // At this point, we have to have 'in'. + if (CurTok != tok_in) + return LogError("expected 'in' keyword after 'var'"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique(std::move(VarNames), std::move(Body)); +} + +/// spawnexpr ::= 'spawn' expression +static std::unique_ptr ParseSpawnExpr() { + getNextToken(); // eat the spawn. + auto Spawned = ParseExpression(); + if (!Spawned) + return nullptr; + return std::make_unique(std::move(Spawned)); +} + +/// syncexpr ::= 'sync' +static std::unique_ptr ParseSyncExpr() { + getNextToken(); // eat the sync. + return std::make_unique(); +} + +/// parforexpr ::= 'parfor' identifier '=' expr ',' expr (',' expr)? 'in' expression +static std::unique_ptr ParseParForExpr() { + getNextToken(); // eat the parfor. + + if (CurTok != tok_identifier) + return LogError("expected identifier after parfor"); + + std::string IdName = IdentifierStr; + getNextToken(); // eat identifier. + + if (CurTok != '=') + return LogError("expected '=' after for"); + getNextToken(); // eat '='. + + auto Start = ParseExpression(); + if (!Start) + return nullptr; + if (CurTok != ',') + return LogError("expected ',' after for start value"); + getNextToken(); + + auto End = ParseExpression(); + if (!End) + return nullptr; + + // The step value is optional. + std::unique_ptr Step; + if (CurTok == ',') { + getNextToken(); + Step = ParseExpression(); + if (!Step) + return nullptr; + } + + if (CurTok != tok_in) + return LogError("expected 'in' after for"); + getNextToken(); // eat 'in'. + + auto Body = ParseExpression(); + if (!Body) + return nullptr; + + return std::make_unique(IdName, std::move(Start), + std::move(End), std::move(Step), + std::move(Body)); +} + +/// primary +/// ::= identifierexpr +/// ::= integerexpr +/// ::= numberexpr +/// ::= parenexpr +/// ::= ifexpr +/// ::= forexpr +/// ::= varexpr +/// ::= spawnexpr +/// ::= syncexpr +/// ::= parforexpr +static std::unique_ptr ParsePrimary(bool Integer = false) { + switch (CurTok) { + default: + return LogError("unknown token when expecting an expression"); + case tok_identifier: + return ParseIdentifierExpr(); + case tok_integer: + return ParseIntegerExpr(); + case tok_number: + return ParseNumberExpr(); + case '(': + return ParseParenExpr(); + case tok_if: + return ParseIfExpr(); + case tok_for: + return ParseForExpr(); + case tok_var: + return ParseVarExpr(); + case tok_spawn: + return ParseSpawnExpr(); + case tok_sync: + return ParseSyncExpr(); + case tok_parfor: + return ParseParForExpr(); + } +} + +/// unary +/// ::= primary +/// ::= '!' unary +static std::unique_ptr ParseUnary() { + // If the current token is not an operator, it must be a primary expr. + if (!isascii(CurTok) || CurTok == '(' || CurTok == ',') + return ParsePrimary(); + + // If this is a unary operator, read it. + int Opc = CurTok; + getNextToken(); + if (auto Operand = ParseUnary()) + return std::make_unique(Opc, std::move(Operand)); + return nullptr; +} + +/// binoprhs +/// ::= ('+' unary)* +static std::unique_ptr ParseBinOpRHS(int ExprPrec, + std::unique_ptr LHS, + bool Integer = false) { + // If this is a binop, find its precedence. + while (true) { + int TokPrec = GetTokPrecedence(); + + // If this is a binop that binds at least as tightly as the current binop, + // consume it, otherwise we are done. + if (TokPrec < ExprPrec) + return LHS; + + // Okay, we know this is a binop. + int BinOp = CurTok; + getNextToken(); // eat binop + + // Parse the unary expression after the binary operator. + auto RHS = ParseUnary(); + if (!RHS) + return nullptr; + + // If BinOp binds less tightly with RHS than the operator after RHS, let + // the pending operator take RHS as its LHS. + int NextPrec = GetTokPrecedence(); + if (TokPrec < NextPrec) { + RHS = ParseBinOpRHS(TokPrec + 1, std::move(RHS)); + if (!RHS) + return nullptr; + } + + // Merge LHS/RHS. + LHS = + std::make_unique(BinOp, std::move(LHS), std::move(RHS)); + } +} + +/// expression +/// ::= unary binoprhs +/// +static std::unique_ptr ParseExpression() { + auto LHS = ParseUnary(); + if (!LHS) + return nullptr; + + return ParseBinOpRHS(0, std::move(LHS)); +} + +/// prototype +/// ::= id '(' id* ')' +/// ::= binary LETTER number? (id, id) +/// ::= unary LETTER (id) +static std::unique_ptr ParsePrototype() { + std::string FnName; + + unsigned Kind = 0; // 0 = identifier, 1 = unary, 2 = binary. + unsigned BinaryPrecedence = 30; + + switch (CurTok) { + default: + return LogErrorP("Expected function name in prototype"); + case tok_identifier: + FnName = IdentifierStr; + Kind = 0; + getNextToken(); + break; + case tok_unary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected unary operator"); + FnName = "unary"; + FnName += (char)CurTok; + Kind = 1; + getNextToken(); + break; + case tok_binary: + getNextToken(); + if (!isascii(CurTok)) + return LogErrorP("Expected binary operator"); + FnName = "binary"; + FnName += (char)CurTok; + Kind = 2; + getNextToken(); + + // Read the precedence if present. + if (CurTok == tok_integer) { + if (IntVal < 1 || IntVal > 100) + return LogErrorP("Invalid precedence: must be 1..100"); + BinaryPrecedence = (unsigned)IntVal; + getNextToken(); + } + break; + } + + if (CurTok != '(') + return LogErrorP("Expected '(' in prototype"); + + std::vector ArgNames; + while (getNextToken() == tok_identifier) + ArgNames.push_back(IdentifierStr); + if (CurTok != ')') + return LogErrorP("Expected ')' in prototype"); + + // success. + getNextToken(); // eat ')'. + + // Verify right number of names for operator. + if (Kind && ArgNames.size() != Kind) + return LogErrorP("Invalid number of operands for operator"); + + return std::make_unique(FnName, ArgNames, Kind != 0, + BinaryPrecedence); +} + +/// definition ::= 'def' prototype expression +static std::unique_ptr ParseDefinition() { + getNextToken(); // eat def. + auto Proto = ParsePrototype(); + if (!Proto) + return nullptr; + + if (auto E = ParseExpression()) + return std::make_unique(std::move(Proto), std::move(E)); + return nullptr; +} + +/// toplevelexpr ::= expression +static std::unique_ptr ParseTopLevelExpr() { + if (auto E = ParseExpression()) { + // Make an anonymous proto. + auto Proto = std::make_unique("__anon_expr", + std::vector()); + return std::make_unique(std::move(Proto), std::move(E)); + } + return nullptr; +} + +/// external ::= 'extern' prototype +static std::unique_ptr ParseExtern() { + getNextToken(); // eat extern. + return ParsePrototype(); +} + +//===----------------------------------------------------------------------===// +// Code Generation +//===----------------------------------------------------------------------===// + +static std::unique_ptr TheContext; +static std::unique_ptr TheModule; +static std::unique_ptr> Builder; +static std::map NamedValues; +static std::unique_ptr TheJIT; +static std::map> FunctionProtos; +static ExitOnError ExitOnErr; + +// Variables for codegen for the current task scope. + +// TaskScopeEntry keeps track of the entry basic block of the function +// or nested task being emitted. +static BasicBlock *TaskScopeEntry = nullptr; + +// TaskScopeSyncRegion keeps track of a call to +// @llvm.syncregion.start() in TaskScopeEntry, if one exists. +static Value *TaskScopeSyncRegion = nullptr; + +// Flags controlled from the command line. +static bool Optimize = true; +static bool RunCilksan = false; +enum PrintIRLevel { + PrintIR_None = 0, + PrintIR_BeforeOpt = 0x1, + PrintIR_BeforeTapirLowering = 0x2, + PrintIR_AfterTapirLoopSpawning = 0x4, + PrintIR_AfterTapirLowering = 0x8, +}; +static bool PrintIRBeforeOpt(PrintIRLevel Level) { + return (static_cast(Level) & static_cast(PrintIR_BeforeOpt)) == + static_cast(PrintIR_BeforeOpt); +} +static bool PrintIRBeforeTapirLowering(PrintIRLevel Level) { + return (static_cast(Level) & + static_cast(PrintIR_BeforeTapirLowering)) == + static_cast(PrintIR_BeforeTapirLowering); +} +static bool PrintIRAfterTapirLoopSpawning(PrintIRLevel Level) { + return (static_cast(Level) & + static_cast(PrintIR_AfterTapirLoopSpawning)) == + static_cast(PrintIR_AfterTapirLoopSpawning); +} +static bool PrintIRAfterTapirLowering(PrintIRLevel Level) { + return (static_cast(Level) & + static_cast(PrintIR_AfterTapirLowering)) == + static_cast(PrintIR_AfterTapirLowering); +} +static PrintIRLevel setPrintIRBeforeOpt(PrintIRLevel Level) { + return static_cast(static_cast(Level) | + static_cast(PrintIR_BeforeOpt)); +} +static PrintIRLevel setPrintIRBeforeTapirLowering(PrintIRLevel Level) { + return static_cast( + static_cast(Level) | static_cast(PrintIR_BeforeTapirLowering)); +} +static PrintIRLevel setPrintIRAfterTapirLoopSpawning(PrintIRLevel Level) { + return static_cast( + static_cast(Level) | + static_cast(PrintIR_AfterTapirLoopSpawning)); +} +static PrintIRLevel setPrintIRAfterTapirLowering(PrintIRLevel Level) { + return static_cast( + static_cast(Level) | static_cast(PrintIR_AfterTapirLowering)); +} +static PrintIRLevel PrintIRLvl = PrintIR_None; +// Options related to Tapir lowering. +static TapirTargetID TheTapirTarget; +static std::string OpenCilkRuntimeBCPath; + +Value *LogErrorV(const char *Str) { + LogError(Str); + return nullptr; +} + +Function *getFunction(std::string Name) { + // First, see if the function has already been added to the current module. + if (auto *F = TheModule->getFunction(Name)) + return F; + + // If not, check whether we can codegen the declaration from some existing + // prototype. + auto FI = FunctionProtos.find(Name); + if (FI != FunctionProtos.end()) + return FI->second->codegen(); + + // If no existing prototype exists, return null. + return nullptr; +} + +/// CreateEntryBlockAlloca - Create an alloca instruction in the entry block of +/// the function. This is used for mutable variables etc. +static AllocaInst *CreateEntryBlockAlloca(Function *TheFunction, + StringRef VarName) { + IRBuilder<> TmpB(&TheFunction->getEntryBlock(), + TheFunction->getEntryBlock().begin()); + return TmpB.CreateAlloca(Type::getDoubleTy(*TheContext), nullptr, VarName); +} + +/// CreateTaskEntryBlockAlloca - Create an alloca instruction in the entry block +/// of the current task. This is used for mutable variables etc. +/// +/// Requires the CFG of the function to be constructed up to BB. +static AllocaInst *CreateTaskEntryBlockAlloca(StringRef VarName, + Type *AllocaTy = + Type::getDoubleTy(*TheContext)) { + BasicBlock *TaskEntry = TaskScopeEntry; + if (!TaskEntry) { + LogError("No local task scope."); + return nullptr; + } + IRBuilder<> TmpB(TaskEntry, TaskEntry->begin()); + return TmpB.CreateAlloca(AllocaTy, nullptr, VarName); +} + +Value *IntegerExprAST::codegen() { + return ConstantInt::get(*TheContext, APSInt::get(Val)); +} + +Value *NumberExprAST::codegen() { + return ConstantFP::get(*TheContext, APFloat(Val)); +} + +Value *VariableExprAST::codegen() { + // Look this variable up in the function. + Value *V = NamedValues[Name]; + if (!V) + return LogErrorV("Unknown variable name"); + + if (!isa(V)) + return V; + + AllocaInst *A = cast(V); + // Load the value. + return Builder->CreateLoad(A->getAllocatedType(), A, Name.c_str()); +} + +Value *UnaryExprAST::codegen() { + Value *OperandV = Operand->codegen(); + if (!OperandV) + return nullptr; + + Function *F = getFunction(std::string("unary") + Opcode); + if (!F) + return LogErrorV("Unknown unary operator"); + + return Builder->CreateCall(F, OperandV, "unop"); +} + +Value *BinaryExprAST::codegen() { + // Special case '=' because we don't want to emit the LHS as an expression. + if (Op == '=') { + // Assignment requires the LHS to be an identifier. + // This assume we're building without RTTI because LLVM builds that way by + // default. If you build LLVM with RTTI this can be changed to a + // dynamic_cast for automatic error checking. + VariableExprAST *LHSE = static_cast(LHS.get()); + if (!LHSE) + return LogErrorV("destination of '=' must be a variable"); + // Codegen the RHS. + Value *Val = RHS->codegen(); + if (!Val) + return nullptr; + + // Look up the name. + Value *Variable = NamedValues[LHSE->getName()]; + if (!Variable) + return LogErrorV("Unknown variable name"); + + Builder->CreateStore(Val, Variable); + return Val; + } + + Value *L = LHS->codegen(); + Value *R = RHS->codegen(); + if (!L || !R) + return nullptr; + Type *LTy = L->getType(); + Type *RTy = R->getType(); + bool IntegerOp = IntegerRes || + (LTy->isIntegerTy() && RTy->isIntegerTy()); + // Cast the operand types if necessary + if (!IntegerOp) { + if (LTy->isIntegerTy()) + L = Builder->CreateSIToFP(L, Type::getDoubleTy(*TheContext)); + if (RTy->isIntegerTy()) + R = Builder->CreateSIToFP(R, Type::getDoubleTy(*TheContext)); + } else if (IntegerRes) { + if (!LTy->isIntegerTy()) + L = Builder->CreateFPToSI(L, Type::getInt64Ty(*TheContext)); + if (!RTy->isIntegerTy()) + R = Builder->CreateFPToSI(R, Type::getInt64Ty(*TheContext)); + } + // Create the appropriate operation + switch (Op) { + case '+': + if (IntegerOp) + return Builder->CreateAdd(L, R, "addtmp"); + return Builder->CreateFAdd(L, R, "addtmp"); + case '-': + if (IntegerOp) + return Builder->CreateSub(L, R, "subtmp"); + return Builder->CreateFSub(L, R, "subtmp"); + case '*': + if (IntegerOp) + return Builder->CreateMul(L, R, "multmp"); + return Builder->CreateFMul(L, R, "multmp"); + case '<': + if (IntegerOp) { + L = Builder->CreateICmpSLT(L, R, "cmptmp"); + return Builder->CreateZExt(L, Type::getInt64Ty(*TheContext), "booltmp"); + } + L = Builder->CreateFCmpULT(L, R, "cmptmp"); + // Convert bool 0/1 to double 0.0 or 1.0 + return Builder->CreateUIToFP(L, Type::getDoubleTy(*TheContext), "booltmp"); + default: + break; + } + + // If it wasn't a builtin binary operator, it must be a user defined one. Emit + // a call to it. + Function *F = getFunction(std::string("binary") + Op); + assert(F && "binary operator not found!"); + + Value *Ops[] = {L, R}; + return Builder->CreateCall(F, Ops, "binop"); +} + +Value *CallExprAST::codegen() { + // Look up the name in the global module table. + Function *CalleeF = getFunction(Callee); + if (!CalleeF) + return LogErrorV("Unknown function referenced"); + + // If argument mismatch error. + if (CalleeF->arg_size() != Args.size()) + return LogErrorV("Incorrect # arguments passed"); + + std::vector ArgsV; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + Value *ArgVal = Args[i]->codegen(); + if (ArgVal->getType()->isIntegerTy()) + ArgVal = Builder->CreateSIToFP(ArgVal, Type::getDoubleTy(*TheContext)); + ArgsV.push_back(ArgVal); + if (!ArgsV.back()) + return nullptr; + } + + return Builder->CreateCall(CalleeF, ArgsV, "calltmp"); +} + +Value *IfExprAST::codegen() { + Value *CondV = Cond->codegen(); + if (!CondV) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + CondV = Builder->CreateFCmpONE( + CondV, ConstantFP::get(*TheContext, APFloat(0.0)), "ifcond"); + + Function *TheFunction = Builder->GetInsertBlock()->getParent(); + + // Create blocks for the then and else cases. Insert the 'then' block at the + // end of the function. + BasicBlock *ThenBB = BasicBlock::Create(*TheContext, "then", TheFunction); + BasicBlock *ElseBB = BasicBlock::Create(*TheContext, "else"); + BasicBlock *MergeBB = BasicBlock::Create(*TheContext, "ifcont"); + + Builder->CreateCondBr(CondV, ThenBB, ElseBB); + + // Emit then value. + Builder->SetInsertPoint(ThenBB); + + Value *ThenV = Then->codegen(); + if (!ThenV) + return nullptr; + + Builder->CreateBr(MergeBB); + // Codegen of 'Then' can change the current block, update ThenBB for the PHI. + ThenBB = Builder->GetInsertBlock(); + + // Emit else block. + TheFunction->insert(TheFunction->end(), ElseBB); + Builder->SetInsertPoint(ElseBB); + + Value *ElseV = Else->codegen(); + if (!ElseV) + return nullptr; + + Builder->CreateBr(MergeBB); + // Codegen of 'Else' can change the current block, update ElseBB for the PHI. + ElseBB = Builder->GetInsertBlock(); + + // Emit merge block. + TheFunction->insert(TheFunction->end(), MergeBB); + Builder->SetInsertPoint(MergeBB); + bool IntegerType = (ThenV->getType()->isIntegerTy() && + ElseV->getType()->isIntegerTy()); + Type *PNTy = IntegerType ? Type::getInt64Ty(*TheContext) : + Type::getDoubleTy(*TheContext); + PHINode *PN = Builder->CreatePHI(PNTy, 2, "iftmp"); + if (!IntegerType) { + if (ThenV->getType()->isIntegerTy()) + ThenV = Builder->CreateSIToFP(ThenV, Type::getDoubleTy(*TheContext)); + if (ElseV->getType()->isIntegerTy()) + ElseV = Builder->CreateSIToFP(ElseV, Type::getDoubleTy(*TheContext)); + } + PN->addIncoming(ThenV, ThenBB); + PN->addIncoming(ElseV, ElseBB); + return PN; +} + +// Output for-loop as: +// var = alloca double +// ... +// start = startexpr +// store start -> var +// br cond +// cond: +// endcond = endexpr +// br endcond, loop, afterloop +// loop: +// ... +// bodyexpr +// ... +// loopend: +// step = stepexpr +// curvar = load var +// nextvar = curvar + step +// store nextvar -> var +// br cond +// afterloop: +Value *ForExprAST::codegen() { + Function *TheFunction = Builder->GetInsertBlock()->getParent(); + + // Create an alloca for the variable in the entry block. + AllocaInst *Alloca = CreateTaskEntryBlockAlloca(VarName); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + if (StartVal->getType()->isIntegerTy()) + StartVal = Builder->CreateSIToFP(StartVal, Type::getDoubleTy(*TheContext)); + + // Store the value into the alloca. + Builder->CreateStore(StartVal, Alloca); + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *CondBB = BasicBlock::Create(*TheContext, "cond", TheFunction); + BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "loop", TheFunction); + BasicBlock *AfterBB = BasicBlock::Create(*TheContext, "afterloop"); + + // Insert an explicit fall through from the current block to the CondBB. + Builder->CreateBr(CondBB); + + // Start insertion in CondBB. + Builder->SetInsertPoint(CondBB); + + // Within the loop, the variable is defined equal to the PHI node. If it + // shadows an existing variable, we have to restore it, so save it now. + Value *OldVal = NamedValues[VarName]; + NamedValues[VarName] = Alloca; + + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + + // Convert condition to a bool by comparing non-equal to 0.0. + EndCond = Builder->CreateFCmpONE( + EndCond, ConstantFP::get(*TheContext, APFloat(0.0)), "loopcond"); + + // Insert the conditional branch into the end of LoopEndBB. + Builder->CreateCondBr(EndCond, LoopBB, AfterBB); + + // Start insertion in LoopBB. + Builder->SetInsertPoint(LoopBB); + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but don't + // allow an error. + if (!Body->codegen()) + return nullptr; + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1.0. + StepVal = ConstantFP::get(*TheContext, APFloat(1.0)); + } + + // Reload, increment, and restore the alloca. This handles the case where + // the body of the loop mutates the variable. + Value *CurVar = + Builder->CreateLoad(Alloca->getAllocatedType(), Alloca, VarName.c_str()); + Value *NextVar = Builder->CreateFAdd(CurVar, StepVal, "nextvar"); + Builder->CreateStore(NextVar, Alloca); + + // Insert a back edge to CondBB. + Builder->CreateBr(CondBB); + + // Emit the "after loop" block. + TheFunction->insert(TheFunction->end(), AfterBB); + + // Any new code will be inserted in AfterBB. + Builder->SetInsertPoint(AfterBB); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // for expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(*TheContext)); +} + +Value *VarExprAST::codegen() { + std::vector OldBindings; + + // Register all variables and emit their initializer. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) { + const std::string &VarName = VarNames[i].first; + ExprAST *Init = VarNames[i].second.get(); + + // Emit the initializer before adding the variable to scope, this prevents + // the initializer from referencing the variable itself, and permits stuff + // like this: + // var a = 1 in + // var a = a in ... # refers to outer 'a'. + Value *InitVal; + if (Init) { + InitVal = Init->codegen(); + if (!InitVal) + return nullptr; + } else { // If not specified, use 0.0. + InitVal = ConstantFP::get(*TheContext, APFloat(0.0)); + } + + AllocaInst *Alloca = CreateTaskEntryBlockAlloca(VarName, InitVal->getType()); + Builder->CreateStore(InitVal, Alloca); + + // Remember the old variable binding so that we can restore the binding when + // we unrecurse. + OldBindings.push_back(NamedValues[VarName]); + + // Remember this binding. + NamedValues[VarName] = Alloca; + } + + // Codegen the body, now that all vars are in scope. + Value *BodyVal = Body->codegen(); + if (!BodyVal) + return nullptr; + + // Pop all our variables from scope. + for (unsigned i = 0, e = VarNames.size(); i != e; ++i) + NamedValues[VarNames[i].first] = OldBindings[i]; + + // Return the body computation. + return BodyVal; +} + +// RAII class to manage the entry block and sync region in each nested task +// scope. +class TaskScopeRAII { + BasicBlock *OldTaskScopeEntry; + Value *OldSyncRegion = nullptr; +public: + explicit TaskScopeRAII(BasicBlock *NewTaskScopeEntry) : + OldTaskScopeEntry(TaskScopeEntry), OldSyncRegion(TaskScopeSyncRegion) { + TaskScopeEntry = NewTaskScopeEntry; + TaskScopeSyncRegion = nullptr; + } + ~TaskScopeRAII() { + TaskScopeEntry = OldTaskScopeEntry; + TaskScopeSyncRegion = OldSyncRegion; + } +}; + +// Helper method for creating sync regions. +static Value *CreateSyncRegion(Module &M) { + BasicBlock *TaskEntry = TaskScopeEntry; + if (!TaskEntry) + return LogErrorV("No local task scope."); + IRBuilder<> TmpB(TaskEntry, TaskEntry->begin()); + return TmpB.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::syncregion_start), {}); +} + +// Output spawn spawned_expr as: +// sync_region = call token @llvm.syncregion.start() +// ... +// detach within sync_region, label detachbb, label continbb +// detachbb: +// ... +// spawned_expr +// ... +// reattach within sync_region, continbb +// continbb: +Value *SpawnExprAST::codegen() { + // Create a sync region for the local function or task scope, if necessary. + if (!TaskScopeSyncRegion) + TaskScopeSyncRegion = CreateSyncRegion(*TheModule); + // Get the sync region for this task scope. + Value *SyncRegion = TaskScopeSyncRegion; + Function *TheFunction = Builder->GetInsertBlock()->getParent(); + + // Create the detach and continue blocks. Insert the continue block + // at the end of the function. + BasicBlock *DetachBB = BasicBlock::Create(*TheContext, "detachbb", + TheFunction); + // We hold off inserting ContinueBB into TheFunction until after we + // emit the spawned statement, to make the final LLVM IR a bit + // cleaner. + BasicBlock *ContinueBB = BasicBlock::Create(*TheContext, "continbb"); + + // Create the detach and prepare to emit the spawned expression starting in + // the detach block. + Builder->CreateDetach(DetachBB, ContinueBB, SyncRegion); + Builder->SetInsertPoint(DetachBB); + + // Emit the spawned computation. + { + TaskScopeRAII TaskScope(DetachBB); + // Emit the spawned expr. This, like any other expr, can change the current + // BB. + if (!Spawned->codegen()) + return nullptr; + + // Emit a reattach to the continue block. + Builder->CreateReattach(ContinueBB, SyncRegion); + } + + TheFunction->insert(TheFunction->end(), ContinueBB); + Builder->SetInsertPoint(ContinueBB); + + // Return a default value of 0.0. + return Constant::getNullValue(Type::getDoubleTy(*TheContext)); +} + +Value *SyncExprAST::codegen() { + // Create a sync region for the local function or task scope, if necessary. + if (!TaskScopeSyncRegion) + TaskScopeSyncRegion = CreateSyncRegion(*TheModule); + // Get the sync region for this task scope. + Value *SyncRegion = TaskScopeSyncRegion; + Function *TheFunction = Builder->GetInsertBlock()->getParent(); + + // Create a continuation block for the sync. + BasicBlock *SyncContinueBB = BasicBlock::Create(*TheContext, "sync.continue", + TheFunction); + + // Create the sync, and set the insert point to the continue block. + Builder->CreateSync(SyncContinueBB, SyncRegion); + Builder->SetInsertPoint(SyncContinueBB); + + // Return a default value of 0.0. + return Constant::getNullValue(Type::getDoubleTy(*TheContext)); +} + +static std::vector GetTapirLoopMetadata() { + std::string TapirLoopSpawningStrategy = "tapir.loop.spawn.strategy"; + const int32_t DACLoopSpawning = 1; + std::vector Result; + + // Add the DAC loop-spawning strategy for Tapir loops. + Result.push_back(MDNode::get(*TheContext, + { MDString::get(*TheContext, + TapirLoopSpawningStrategy), + ConstantAsMetadata::get( + Builder->getInt32(DACLoopSpawning)) })); + + return Result; +} + +// Output parfor-loop as: +// sr = call token @llvm.syncregion.start +// ... +// start = startexpr +// br pcond +// pcond: +// variable = phi [start, loopheader], [nextvar, loopend] +// endcond = endexpr +// br endcond, ploop, afterloop +// ploop: +// detach within sr, ploop.bodyentry, ploop.continue +// ploop.bodyentry: +// var = alloca double +// store variable -> var +// ... +// bodyexpr +// ... +// reattach within sr, ploop.continue +// ploop.continue: +// step = stepexpr +// nextvar = variable + step +// br cond +// afterloop: +// sync within sr, aftersync +// aftersync: +Value *ParForExprAST::codegen() { + Function *TheFunction = Builder->GetInsertBlock()->getParent(); + + // Emit the start code first, without 'variable' in scope. + Value *StartVal = Start->codegen(); + if (!StartVal) + return nullptr; + + // Make the new basic block for the loop header, inserting after current + // block. + BasicBlock *PreheaderBB = Builder->GetInsertBlock(); + BasicBlock *CondBB = BasicBlock::Create(*TheContext, "pcond", TheFunction); + BasicBlock *LoopBB = BasicBlock::Create(*TheContext, "ploop", TheFunction); + BasicBlock *AfterBB = BasicBlock::Create(*TheContext, "afterloop"); + + // [Tapir] Create a sync region just for the loop, so we can sync + // the loop iterations separately from other spawns in the same + // function. + Value *SyncRegion = CreateSyncRegion(*TheFunction->getParent()); + + // Insert an explicit fall through from the current block to the CondBB. + Builder->CreateBr(CondBB); + + // Start insertion in CondBB. + Builder->SetInsertPoint(CondBB); + + // Start the PHI node with an entry for Start. + // [Tapir] Note: For the LoopSpawning pass to work, we ensure that + // Variable is an integer. + PHINode *Variable = + Builder->CreatePHI(Type::getInt64Ty(*TheContext), 2, VarName); + Variable->addIncoming(StartVal, PreheaderBB); + + // Within the parallel loop, we use new different copies of the variable. + // Save any existing variables that are shadowed. + Value *OldVal = NamedValues[VarName]; + // For the end condition, use the PHI node as the variable VarName. + NamedValues[VarName] = Variable; + + // If the end is a binary expression, force it to produce an integer result. + End->setIntegerRes(); + // Compute the end condition. + Value *EndCond = End->codegen(); + if (!EndCond) + return nullptr; + // [Tapir] Note: For the LoopSpawning pass to work, we ensure that + // EndCond is an integer. + if (!EndCond->getType()->isIntegerTy()) + EndCond = Builder->CreateFPToSI(EndCond, Type::getInt64Ty(*TheContext)); + + // Convert condition to a bool by comparing non-equal to 0. + EndCond = Builder->CreateICmpNE( + EndCond, ConstantInt::get(*TheContext, APSInt::get(0)), "loopcond"); + + // Insert the conditional branch to either LoopBB or AfterBB. + Builder->CreateCondBr(EndCond, LoopBB, AfterBB); + + // Start insertion in LoopBB. + Builder->SetInsertPoint(LoopBB); + + // [Tapir] Create a block for detaching the loop body and a block + // for the continuation of the detach. + BasicBlock *DetachBB = + BasicBlock::Create(*TheContext, "ploop.bodyentry", TheFunction); + BasicBlock *ContinueBB = + BasicBlock::Create(*TheContext, "ploop.continue"); + + // [Tapir] Insert a detach to spawn the loop body. + Builder->CreateDetach(DetachBB, ContinueBB, SyncRegion); + Builder->SetInsertPoint(DetachBB); + + // [Tapir] Emit the spawned loop body. + { + // [Tapir] Create a nested task scope corresponding to the loop + // body, to allow for nested spawns and parallel loops in the + // parallel-loop body. + TaskScopeRAII TaskScope(DetachBB); + + // To avoid races, within the parallel loop's body, the variable + // is stored in a task-local allocation. Create an alloca in the + // task's entry block for this version of the variable. + AllocaInst *VarAlloca = + CreateTaskEntryBlockAlloca(VarName, Type::getInt64Ty(*TheContext)); + // Store the value into the alloca. + Builder->CreateStore(Variable, VarAlloca); + NamedValues[VarName] = VarAlloca; + + // Emit the body of the loop. This, like any other expr, can change the + // current BB. Note that we ignore the value computed by the body, but + // don't allow an error. + if (!Body->codegen()) + return nullptr; + + // [Tapir] Emit the reattach to terminate the task containing the + // body of the parallel loop. + Builder->CreateReattach(ContinueBB, SyncRegion); + } + + // Emit the continue block of the detach. + TheFunction->insert(TheFunction->end(), ContinueBB); + + // Set the insertion point to the continue block of the detach. + Builder->SetInsertPoint(ContinueBB); + + // Emit the step value. + Value *StepVal = nullptr; + if (Step) { + StepVal = Step->codegen(); + if (!StepVal) + return nullptr; + } else { + // If not specified, use 1. + // [Tapir] For the LoopSpawning pass to work, we ensure that + // StepVal is an integer. + StepVal = ConstantInt::get(*TheContext, APSInt::get(1)); + } + Value *NextVar = Builder->CreateAdd(Variable, StepVal, "nextvar"); + + // Insert a back edge to CondBB + BranchInst *BackEdge = Builder->CreateBr(CondBB); + + // [Tapir] Emit loop metadata, so LoopSpawning will work on this + // loop. + std::vector LoopMetadata = GetTapirLoopMetadata(); + if (!LoopMetadata.empty()) { + auto TempNode = MDNode::getTemporary(*TheContext, std::nullopt); + LoopMetadata.insert(LoopMetadata.begin(), TempNode.get()); + auto LoopID = MDNode::get(*TheContext, LoopMetadata); + LoopID->replaceOperandWith(0, LoopID); + BackEdge->setMetadata(LLVMContext::MD_loop, LoopID); + } + + // Add a new entry to the PHI node for the backedge. + Variable->addIncoming(NextVar, ContinueBB); + + // Emit the "after loop" block. + TheFunction->insert(TheFunction->end(), AfterBB); + + // Any new code will be inserted in AfterBB. + Builder->SetInsertPoint(AfterBB); + + // [Tapir] Create the "after sync" block and insert it. + BasicBlock *AfterSync = + BasicBlock::Create(*TheContext, "aftersync", TheFunction); + + // [Tapir] Insert a sync for the loop. + Builder->CreateSync(AfterSync, SyncRegion); + Builder->SetInsertPoint(AfterSync); + + // Restore the unshadowed variable. + if (OldVal) + NamedValues[VarName] = OldVal; + else + NamedValues.erase(VarName); + + // parfor expr always returns 0.0. + return Constant::getNullValue(Type::getDoubleTy(*TheContext)); +} + +Function *PrototypeAST::codegen() { + // Make the function type: double(double,double) etc. + std::vector Doubles(Args.size(), Type::getDoubleTy(*TheContext)); + FunctionType *FT = + FunctionType::get(Type::getDoubleTy(*TheContext), Doubles, false); + + Function *F = + Function::Create(FT, Function::ExternalLinkage, Name, TheModule.get()); + + // Set names for all arguments. + unsigned Idx = 0; + for (auto &Arg : F->args()) + Arg.setName(Args[Idx++]); + + return F; +} + +static void CreateOptimizationPassPipeline(ModulePassManager &MPM); + +static void RunOptimizations() { + LoopAnalysisManager LAM; + FunctionAnalysisManager FAM; + ModuleAnalysisManager MAM; + + // Create TargetLibraryInfo for setting the target of Tapir lowering. + Triple TargetTriple(TheModule->getTargetTriple()); + TargetLibraryInfoImpl TLII(TargetTriple); + + // Set the target for Tapir lowering to the Cilk runtime system. + TLII.setTapirTarget(TheTapirTarget); + if (TapirTargetID::OpenCilk == TheTapirTarget) + TLII.setTapirTargetOptions( + std::make_unique(OpenCilkRuntimeBCPath)); + + // Add the TargetLibraryInfo to the pass manager. + FAM.registerPass([&] { return TargetLibraryAnalysis(TLII); }); + // Register necessary analyses. + FAM.registerPass([&] { + AAManager AA; + AA.registerFunctionAnalysis(); + return AA; + }); + FAM.registerPass([&] { return AssumptionAnalysis(); }); + FAM.registerPass([&] { return BasicAA(); }); + FAM.registerPass([&] { return BlockFrequencyAnalysis(); }); + FAM.registerPass([&] { return BranchProbabilityAnalysis(); }); + FAM.registerPass([&] { return DependenceAnalysis(); }); + FAM.registerPass([&] { return DominatorTreeAnalysis(); }); + FAM.registerPass([&] { return LoopAnalysis(); }); + FAM.registerPass([&] { return MemoryDependenceAnalysis(); }); + FAM.registerPass([&] { return MemorySSAAnalysis(); }); + FAM.registerPass([&] { return OptimizationRemarkEmitterAnalysis(); }); + FAM.registerPass([&] { return PostDominatorTreeAnalysis(); }); + FAM.registerPass([&] { return ScalarEvolutionAnalysis(); }); + FAM.registerPass([&] { return TapirRaceDetect(); }); + FAM.registerPass([&] { return TargetIRAnalysis(); }); + FAM.registerPass([&] { return TaskAnalysis(); }); + LAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + FAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + MAM.registerPass([&] { return CallGraphAnalysis(); }); + MAM.registerPass([&] { return PassInstrumentationAnalysis(); }); + MAM.registerPass([&] { return ProfileSummaryAnalysis(); }); + // Cross-register analysis proxies. + MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); + FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); + FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); }); + LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); }); + + // Build the optimization pipeline. + ModulePassManager MPM; + CreateOptimizationPassPipeline(MPM); + // Run the optimizer on the function. + MPM.run(*TheModule, MAM); +} + +Function *FunctionAST::codegen() { + // Transfer ownership of the prototype to the FunctionProtos map, but keep a + // reference to it for use below. + auto &P = *Proto; + FunctionProtos[Proto->getName()] = std::move(Proto); + Function *TheFunction = getFunction(P.getName()); + if (!TheFunction) + return nullptr; + + // If this is an operator, install it. + if (P.isBinaryOp()) + BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence(); + + // Create a new basic block to start insertion into. + BasicBlock *BB = BasicBlock::Create(*TheContext, "entry", TheFunction); + Builder->SetInsertPoint(BB); + + // Record the function arguments in the NamedValues map. + NamedValues.clear(); + for (auto &Arg : TheFunction->args()) { + // Create an alloca for this variable. + AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName()); + + // Store the initial value into the alloca. + Builder->CreateStore(&Arg, Alloca); + + // Add arguments to variable symbol table. + NamedValues[std::string(Arg.getName())] = Alloca; + } + + TaskScopeRAII TaskScope(BB); + if (Value *RetVal = Body->codegen()) { + // Finish off the function. + if (RetVal->getType()->isIntegerTy()) + RetVal = Builder->CreateSIToFP(RetVal, Type::getDoubleTy(*TheContext)); + Builder->CreateRet(RetVal); + + TheFunction->setDoesNotThrow(); + + // Mark the function for race-detection + if (RunCilksan) + TheFunction->addFnAttr(Attribute::SanitizeCilk); + + // Validate the generated code, checking for consistency. + verifyFunction(*TheFunction); + + // Run the optimizer on the function. + RunOptimizations(); + + return TheFunction; + } + + // Error reading body, remove function. + TheFunction->eraseFromParent(); + + if (P.isBinaryOp()) + BinopPrecedence.erase(P.getOperatorName()); + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Top-Level parsing and JIT Driver +//===----------------------------------------------------------------------===// + +static void AddTapirLoweringPasses(ModulePassManager &MPM); + +static void CreateOptimizationPassPipeline(ModulePassManager &MPM) { + if (PrintIRBeforeOpt(PrintIRLvl)) { + MPM.addPass(createModuleToFunctionPassAdaptor( + PrintFunctionPass(errs(), "IR dump before optimizations"))); + } + + if (Optimize) { + FunctionPassManager FPM; + // Promote memory to registers. + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + // Catch trivial redundancies + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + // Do simple "peephole" optimizations and bit-twiddling optzns. + FPM.addPass(InstCombinePass()); + // Reassociate expressions. + FPM.addPass(ReassociatePass()); + // Eliminate Common SubExpressions. + FPM.addPass(GVNPass()); + // Simplify the control flow graph (deleting unreachable blocks, etc). + FPM.addPass(SimplifyCFGPass()); + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + + // If requested, run the CilkSanitizer pass. + if (RunCilksan) { + MPM.addPass(CSISetupPass()); + MPM.addPass(CilkSanitizerPass()); + } + + if (PrintIRBeforeTapirLowering(PrintIRLvl)) { + MPM.addPass(createModuleToFunctionPassAdaptor( + PrintFunctionPass(errs(), "IR dump before Tapir lowering"))); + } + + // Add Tapir lowering passes. + AddTapirLoweringPasses(MPM); +} + +static void AddTapirLoweringPasses(ModulePassManager &MPM) { + // First, handle Tapir loops. Loops are handled by first canonicalizing their + // representation and then performing LoopSpawning to ensure that iterations + // are spawned efficiently in parallel. + if (Optimize) { + FunctionPassManager FPM; + LoopPassManager LPM1, LPM2; + // Start by simplifying the loops. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + // Hoist loop invariants + LPM1.addPass(LICMPass(/*LicmMssaOptCap*/ 100, + /*LicmMssaNoAccForPromotionCap*/ 250, + /*AllowSpeculation=*/true)); + // Cleanup the CFG and instructions + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM1), /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + // Re-rotate loops in all our loop nests. + LPM2.addPass(LoopRotatePass(/* Disable header duplication */ true, + /* isLTOPreLink */ false)); + // Simplify the loop induction variables. + LPM2.addPass(IndVarSimplifyPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM2), + /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + // Transform Tapir loops to ensure that iterations are spawned efficiently + // in parallel. + if (TheTapirTarget != TapirTargetID::None) { + MPM.addPass(LoopSpawningPass()); + // The LoopSpawning pass may leave cruft around. Clean it up. + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); + } + + if (PrintIRAfterTapirLoopSpawning(PrintIRLvl)) { + MPM.addPass(PrintModulePass(errs(), "IR dump after Tapir loop spawning")); + } + } + + // Second, lower Tapir constructs in general to some parallel runtime system, + // as specified in TargetLibraryInfo. + + // Add pass to lower Tapir to the target runtime. + if (TheTapirTarget != TapirTargetID::None) { + MPM.addPass(TapirToTargetPass()); + + if (Optimize) { + FunctionPassManager FPM; + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + + // Perform some cleanup after the lowering pass. + MPM.addPass(AlwaysInlinerPass( + /*InsertLifetimeIntrinsics=*/false)); + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); + + if (PrintIRAfterTapirLowering(PrintIRLvl)) { + MPM.addPass(PrintModulePass(errs(), "IR dump after Tapir lowering")); + } + } +} + +static void InitializeModule() { + // Open a new module. + TheContext = std::make_unique(); + std::string ModuleName; + static size_t Counter = 0; + raw_string_ostream(ModuleName) << "my_module." << Counter++; + TheModule = std::make_unique(ModuleName, *TheContext); + + // Set the target triple to match the system. + auto SysTargetTriple = sys::getDefaultTargetTriple(); + TheModule->setTargetTriple(SysTargetTriple); + // Set an appropriate data layout + TheModule->setDataLayout(TheJIT->getDataLayout()); + + // Create a new builder for the module. + Builder = std::make_unique>(*TheContext); +} + +static void HandleDefinition() { + if (auto FnAST = ParseDefinition()) { + if (auto *FnIR = FnAST->codegen()) { + ExitOnErr(TheJIT->addModule( + ThreadSafeModule(std::move(TheModule), std::move(TheContext)))); + InitializeModule(); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleExtern() { + if (auto ProtoAST = ParseExtern()) { + if (auto *FnIR = ProtoAST->codegen()) { + FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +static void HandleTopLevelExpression() { + // Evaluate a top-level expression into an anonymous function. + if (auto FnAST = ParseTopLevelExpr()) { + if (FnAST->codegen()) { + // Create a ResourceTracker to track JIT'd memory allocated to our + // anonymous expression -- that way we can free it after executing. + auto RT = TheJIT->getMainJITDylib().createResourceTracker(); + + auto TSM = ThreadSafeModule(std::move(TheModule), std::move(TheContext)); + ExitOnErr(TheJIT->addModule(std::move(TSM), RT)); + InitializeModule(); + + // Search the JIT for the __anon_expr symbol. + auto ExprSymbol = ExitOnErr(TheJIT->lookup("__anon_expr")); + + // Run initializers. + ExitOnErr(TheJIT->initialize()); + + std::unique_ptr T = + std::make_unique("__anon_expr", "Top-level expression"); + // Get the symbol's address and cast it to the right type (takes no + // arguments, returns a double) so we can call it as a native function. + double (*FP)() = ExprSymbol.getAddress().toPtr(); + T->startTimer(); + double Result = FP(); + T->stopTimer(); + fprintf(stderr, "Evaluated to %f\n", Result); + + // Delete the anonymous expression module from the JIT. + ExitOnErr(RT->remove()); + } + } else { + // Skip token for error recovery. + getNextToken(); + } +} + +/// top ::= definition | external | expression | ';' +static void MainLoop() { + while (true) { + switch (CurTok) { + case tok_eof: + return; + case ';': // ignore top-level semicolons. + getNextToken(); + break; + case tok_def: + HandleDefinition(); + break; + case tok_extern: + HandleExtern(); + break; + default: + HandleTopLevelExpression(); + break; + } + fprintf(stderr, "ready> "); + } +} + +//===----------------------------------------------------------------------===// +// Main driver code. +//===----------------------------------------------------------------------===// + +static int usage(char *argv[]) { + errs() << "Usage: " << argv[0] + << " [-h|--help]" + << " [--lower-tapir-to {cilk|none}]" + << " [--run-cilksan]" + << " [--print-ir {before-opt|before-tapir-lowering|after-tapir-loop-spawning|after-tapir-lowering|all}]" + << " [-O[0-3]]" + << "\n"; + return 1; +} + +int main(int argc, char *argv[]) { + // Set the default Tapir target to be OpenCilk. + TheTapirTarget = TapirTargetID::OpenCilk; + + // Parse command-line arguments + for (int i = 1; i < argc; ++i) { + if (std::string(argv[i]) == "-h" || std::string(argv[i]) == "--help") { + return usage(argv); + } else if (std::string(argv[i]) == "--lower-tapir-to") { + std::string targetStr = std::string(argv[++i]); + if (targetStr == "cilk") { + TheTapirTarget = TapirTargetID::OpenCilk; + } else if (targetStr == "none") { + TheTapirTarget = TapirTargetID::None; + } else { + return usage(argv); + } + } else if (std::string(argv[i]) == "--run-cilksan") { + RunCilksan = true; + } else if (std::string(argv[i]) == "--print-ir") { + // PrintIR = true; + std::string level = std::string(argv[++i]); + if (level == "before-opt" || level == "all") { + PrintIRLvl = setPrintIRBeforeOpt(PrintIRLvl); + } + if (level == "before-tapir-lowering" || level == "all") { + PrintIRLvl = setPrintIRBeforeTapirLowering(PrintIRLvl); + } + if (level == "after-tapir-loop-spawning" || level == "all") { + PrintIRLvl = setPrintIRAfterTapirLoopSpawning(PrintIRLvl); + } + if (level == "after-tapir-lowering" || level == "all") { + PrintIRLvl = setPrintIRAfterTapirLowering(PrintIRLvl); + } + if (PrintIRLvl == PrintIR_None) + return usage(argv); + } else if (std::string(argv[i]) == "-O0") { + Optimize = false; + } else if ((std::string(argv[i]) == "-O1") || + (std::string(argv[i]) == "-O2") || + (std::string(argv[i]) == "-O3")) { + Optimize = true; + } else { + return usage(argv); + } + } + + // Get the system architecture name. + Triple SysTriple(sys::getDefaultTargetTriple()); + StringRef ArchName = SysTriple.getArchName(); + + if (TapirTargetID::OpenCilk == TheTapirTarget) { + // Set the path to the OpenCilk runtime-ABI bitcode file. + std::optional Path = sys::Process::FindInEnvPath( + "LIBRARY_PATH", ("libopencilk-abi-" + ArchName + ".bc").str()); + if (!Path.has_value()) + Path = sys::Process::FindInEnvPath("LIBRARY_PATH", "libopencilk-abi.bc"); + if (!Path.has_value()) { + errs() << "Error: Cannot find OpenCilk runtime-ABI bitcode file " + "LIBRARY_PATH.\n"; + return 1; + } + OpenCilkRuntimeBCPath = *Path; + } + + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + InitializeNativeTargetAsmParser(); + + // Install standard binary operators. + // 1 is lowest precedence. + BinopPrecedence['='] = 2; + BinopPrecedence['<'] = 10; + BinopPrecedence['+'] = 20; + BinopPrecedence['-'] = 20; + BinopPrecedence['*'] = 40; // highest. + + // Prime the first token. + fprintf(stderr, "ready> "); + getNextToken(); + + TheJIT = ExitOnErr(KaleidoscopeJIT::Create()); + + if (TapirTargetID::OpenCilk == TheTapirTarget) { + // Link the OpenCilk runtime library. + std::optional Path = sys::Process::FindInEnvPath( + "LD_LIBRARY_PATH", + ("libopencilk-personality-c-" + ArchName + ".so").str()); + if (!Path.has_value()) + Path = sys::Process::FindInEnvPath("LD_LIBRARY_PATH", + "libopencilk-personality-c.so"); + if (!Path.has_value()) { + errs() << "Error: Cannot find OpenCilk runtime library in " + "LD_LIBRARY_PATH.\n"; + return 1; + } + TheJIT->loadLibrary(Path->c_str()); + Path = sys::Process::FindInEnvPath( + "LD_LIBRARY_PATH", ("libopencilk-" + ArchName + ".so").str()); + if (!Path.has_value()) + Path = sys::Process::FindInEnvPath("LD_LIBRARY_PATH", "libopencilk.so"); + if (!Path.has_value()) { + errs() << "Error: Cannot find OpenCilk runtime library in " + "LD_LIBRARY_PATH.\n"; + return 1; + } + TheJIT->loadLibrary(Path->c_str()); + } + + if (RunCilksan) { + // Add the Cilksan runtime library. + std::optional Path = sys::Process::FindInEnvPath( + "LD_LIBRARY_PATH", ("libclang_rt.cilksan-" + ArchName + ".so").str()); + if (!Path.has_value()) + Path = sys::Process::FindInEnvPath("LD_LIBRARY_PATH", + "libclang_rt.cilksan.so"); + if (!Path.has_value()) { + errs() + << "Error: Cannot find Cilksan runtime library in LD_LIBRARY_PATH.\n"; + return 1; + } + TheJIT->loadLibrary(Path->c_str()); + } + + InitializeModule(); + + // Run the main "interpreter loop" now. + MainLoop(); + + return 0; +} diff --git a/llvm/examples/Kaleidoscope/lib/toylib.c b/llvm/examples/Kaleidoscope/lib/toylib.c new file mode 100644 index 00000000000000..f38b1a3dbc2009 --- /dev/null +++ b/llvm/examples/Kaleidoscope/lib/toylib.c @@ -0,0 +1,24 @@ +#include +#include + +//===----------------------------------------------------------------------===// +// "Library" functions that can be "extern'd" from user code. +//===----------------------------------------------------------------------===// + +#ifdef _WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +/// putchard - putchar that takes a double and returns 0. +DLLEXPORT double putchard(double X) { + fputc((char)X, stderr); + return 0; +} + +/// printd - printf that takes a double prints it as "%f\n", returning 0. +DLLEXPORT double printd(double X) { + fprintf(stderr, "%f\n", X); + return 0; +} diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index fbba8ca42a8c76..5148d2045b3988 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -142,7 +142,12 @@ typedef enum { LLVMCatchRet = 62, LLVMCatchPad = 63, LLVMCleanupPad = 64, - LLVMCatchSwitch = 65 + LLVMCatchSwitch = 65, + + /* Parallel operators */ + LLVMDetach = 69, + LLVMReattach = 70, + LLVMSync = 71, } LLVMOpcode; typedef enum { @@ -1686,6 +1691,9 @@ LLVMTypeRef LLVMTargetExtTypeInContext(LLVMContextRef C, const char *Name, macro(CatchReturnInst) \ macro(CatchSwitchInst) \ macro(CallBrInst) \ + macro(DetachInst) \ + macro(ReattachInst) \ + macro(SyncInst) \ macro(FuncletPadInst) \ macro(CatchPadInst) \ macro(CleanupPadInst) \ @@ -3782,6 +3790,18 @@ LLVMValueRef LLVMBuildCatchSwitch(LLVMBuilderRef B, LLVMValueRef ParentPad, LLVMBasicBlockRef UnwindBB, unsigned NumHandlers, const char *Name); +/* Tapir */ +LLVMValueRef LLVMBuildDetach(LLVMBuilderRef B, + LLVMBasicBlockRef DetachBB, + LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion); +LLVMValueRef LLVMBuildReattach(LLVMBuilderRef B, + LLVMBasicBlockRef ReattachBB, + LLVMValueRef SyncRegion); +LLVMValueRef LLVMBuildSync(LLVMBuilderRef B, + LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion); + /* Add a case to the switch instruction */ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal, LLVMBasicBlockRef Dest); diff --git a/llvm/include/llvm-c/Transforms/Tapir.h b/llvm/include/llvm-c/Transforms/Tapir.h new file mode 100644 index 00000000000000..c2a363353d1e2c --- /dev/null +++ b/llvm/include/llvm-c/Transforms/Tapir.h @@ -0,0 +1,46 @@ +/*===- Tapir.h - Tapir Transformation Library C Interface -------*- C++ -*-===*\ +|* *| +|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *| +|* Exceptions. *| +|* See https://llvm.org/LICENSE.txt for license information. *| +|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header declares the C interface to libLLVMTapirOpts.a, which *| +|* implements various Tapir transformations of the LLVM IR. *| +|* *| +|* Many exotic languages can interoperate with C code but have a harder time *| +|* with C++ due to name mangling. So in addition to C, this interface enables *| +|* tools written in such languages. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_TRANSFORMS_TAPIR_H +#define LLVM_C_TRANSFORMS_TAPIR_H + +#include "llvm-c/ExternC.h" +#include "llvm-c/Types.h" + +LLVM_C_EXTERN_C_BEGIN + +/** + * @defgroup LLVMCTransformsTapir Tapir transformations + * @ingroup LLVMCTransforms + * + * @{ + */ + +/** See llvm::createLowerTapirToTargetPass function. */ +void LLVMAddLowerTapirToTargetPass(LLVMPassManagerRef PM); + +/** See llvm::createLoopSpawningPass function. */ +void LLVMAddLoopSpawningPass(LLVMPassManagerRef PM); + +/** + * @} + */ + +LLVM_C_EXTERN_C_END + +#endif diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 8da8d516499aa4..e96884beb4d540 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -58,10 +58,12 @@ class BasicBlock; class CatchPadInst; class CatchReturnInst; class DominatorTree; +class DetachInst; class FenceInst; class Function; class LoopInfo; class PreservedAnalyses; +class SyncInst; class TargetLibraryInfo; class Value; template class SmallPtrSetImpl; @@ -286,6 +288,10 @@ class AAQueryInfo { /// store %l, ... bool MayBeCrossIteration = false; + /// Whether the instructions corresponding with this query should be + /// considered as part of the same spindle. + bool AssumeSameSpindle = false; + AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {} }; @@ -339,6 +345,11 @@ class AAResults { /// alias analysis implementations. AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + /// Version of alias() method where the assumption is explicitly stated of + /// whether the query applies to operations within the same spindle. + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + bool AssumeSameSpindle); + /// A convenience wrapper around the primary \c alias interface. AliasResult alias(const Value *V1, LocationSize V1Size, const Value *V2, LocationSize V2Size) { @@ -508,6 +519,8 @@ class AAResults { /// Return information about whether a call and an instruction may refer to /// the same memory locations. ModRefInfo getModRefInfo(const Instruction *I, const CallBase *Call); + ModRefInfo getModRefInfo(const Instruction *I, const CallBase *Call, + bool AssumeSameSpindle); /// Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I @@ -587,6 +600,10 @@ class AAResults { AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc, AAQueryInfo &AAQI); + ModRefInfo getModRefInfo(const DetachInst *D, const MemoryLocation &Loc, + AAQueryInfo &AAQI); + ModRefInfo getModRefInfo(const SyncInst *S, const MemoryLocation &Loc, + AAQueryInfo &AAQI); ModRefInfo getModRefInfo(const Instruction *I, const std::optional &OptLoc, AAQueryInfo &AAQIP); @@ -595,6 +612,12 @@ class AAResults { AAQueryInfo &AAQIP); MemoryEffects getMemoryEffects(const CallBase *Call, AAQueryInfo &AAQI); + /// Return the behavior for the task detached from a given detach instruction. + MemoryEffects getMemoryEffects(const DetachInst *D, AAQueryInfo &AAQI); + + /// Return the behavior for a sync instruction. + MemoryEffects getMemoryEffects(const SyncInst *S, AAQueryInfo &AAQI); + private: class Concept; @@ -636,6 +659,14 @@ class BatchAAResults { bool IgnoreLocals = false) { return AA.getModRefInfoMask(Loc, AAQI, IgnoreLocals); } + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, + bool AssumeSameSpindle) { + bool OldAssumeSameSpindle = AAQI.AssumeSameSpindle; + AAQI.AssumeSameSpindle = AssumeSameSpindle; + auto Result = AA.getModRefInfo(Call1, Call2, AAQI); + AAQI.AssumeSameSpindle = OldAssumeSameSpindle; + return Result; + } ModRefInfo getModRefInfo(const Instruction *I, const std::optional &OptLoc) { return AA.getModRefInfo(I, OptLoc, AAQI); @@ -643,6 +674,14 @@ class BatchAAResults { ModRefInfo getModRefInfo(const Instruction *I, const CallBase *Call2) { return AA.getModRefInfo(I, Call2, AAQI); } + ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2, + bool AssumeSameSpindle) { + bool OldAssumeSameSpindle = AAQI.AssumeSameSpindle; + AAQI.AssumeSameSpindle = AssumeSameSpindle; + auto Result = AA.getModRefInfo(I, Call2, AAQI); + AAQI.AssumeSameSpindle = OldAssumeSameSpindle; + return Result; + } ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) { return AA.getArgModRefInfo(Call, ArgIdx); } @@ -845,6 +884,11 @@ class AAResultBase { /// Return true if this pointer is returned by a noalias function. bool isNoAliasCall(const Value *V); +/// Return true if this pointer is returned by a noalias function or, if one +/// assumes the query pertains to operations in the same spindle, a +/// strand_noalias function. +bool isNoAliasCallIfInSameSpindle(const Value *V); + /// Return true if this pointer refers to a distinct and identifiable object. /// This returns true for: /// Global Variables and Functions (but not Global Aliases) @@ -854,6 +898,14 @@ bool isNoAliasCall(const Value *V); /// bool isIdentifiedObject(const Value *V); +/// Return true if this pointer refers to a distinct and identifiable object +/// when the query occurs between operations in the same spindle. +/// This returns true for: +/// Every value for which isIdentifiedObject(V) returns true +/// StrandNoAlias returns +/// +bool isIdentifiedObjectIfInSameSpindle(const Value *V); + /// Return true if V is umabigously identified at the function-level. /// Different IdentifiedFunctionLocals can't alias. /// Further, an IdentifiedFunctionLocal can not alias with any function diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index ca67e0905c5f6a..f5137e9dd79c05 100644 --- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -141,6 +141,10 @@ class BasicAAResult : public AAResultBase { const Value *V2, LocationSize V2Size, AAQueryInfo &AAQI, const Value *O1, const Value *O2); + + AliasResult checkInjectiveArguments(const Value *V1, const Value *O1, + const Value *V2, const Value *O2, + AAQueryInfo &AAQI); }; /// Analysis pass providing a never-invalidated alias analysis result. diff --git a/llvm/include/llvm/Analysis/CodeMetrics.h b/llvm/include/llvm/Analysis/CodeMetrics.h index a9431bca11251a..3c41fd9f11f890 100644 --- a/llvm/include/llvm/Analysis/CodeMetrics.h +++ b/llvm/include/llvm/Analysis/CodeMetrics.h @@ -23,6 +23,7 @@ class BasicBlock; class Loop; class Function; template class SmallPtrSetImpl; +class TargetLibraryInfo; class TargetTransformInfo; class Value; @@ -60,6 +61,9 @@ struct CodeMetrics { /// Keep track of the number of calls to 'big' functions. unsigned NumCalls = false; + /// Keep track of the number of calls to 'builtin' functions. + unsigned NumBuiltinCalls = 0; + /// The number of calls to internal functions with a single caller. /// /// These are likely targets for future inlining, likely exposed by @@ -77,7 +81,8 @@ struct CodeMetrics { /// Add information about a block to the current state. void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI, const SmallPtrSetImpl &EphValues, - bool PrepareForLTO = false); + bool PrepareForLTO = false, + TargetLibraryInfo *TLI = nullptr); /// Collect a loop's ephemeral values (those used only by an assume /// or similar intrinsics in the loop). diff --git a/llvm/include/llvm/Analysis/DataRaceFreeAliasAnalysis.h b/llvm/include/llvm/Analysis/DataRaceFreeAliasAnalysis.h new file mode 100644 index 00000000000000..acdd7508ad4149 --- /dev/null +++ b/llvm/include/llvm/Analysis/DataRaceFreeAliasAnalysis.h @@ -0,0 +1,85 @@ +//===- DataRaceFreeAliasAnalysis.h - DRF-based AA ---------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This is the interface for an alias analysis based on the assumption that +/// a Tapir program is data-race free. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DATARACEFREEALIASANALYSIS_H +#define LLVM_ANALYSIS_DATARACEFREEALIASANALYSIS_H + +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + +class TaskInfo; +class MemoryLocation; + +extern cl::opt EnableDRFAA; + +/// A simple alias analysis implementation that implements the assumption that +/// the Tapir program is data-race free. This analysis uses TaskInfo to +/// determine which may-aliasing instructions may happen in parallel. If two +/// that may alias instructions may happen in parallel and the instructions are +/// not otherwise marked atomic, then the data-race-free assumption asserts that +/// they do not alias. +class DRFAAResult : public AAResultBase { + TaskInfo &TI; + +public: + explicit DRFAAResult(TaskInfo &TI) : AAResultBase(), TI(TI) {} + DRFAAResult(DRFAAResult &&Arg) : AAResultBase(std::move(Arg)), TI(Arg.TI) {} + + /// Handle invalidation events in the new pass manager. + bool invalidate(Function &Fn, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv); + + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB, + AAQueryInfo &AAQI, const Instruction *CtxI); + ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, + AAQueryInfo &AAQI); + ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2, + AAQueryInfo &AAQI); +}; + +/// Analysis pass providing a never-invalidated alias analysis result. +class DRFAA : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = DRFAAResult; + + DRFAAResult run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Legacy wrapper pass to provide the DRFAAResult object. +class DRFAAWrapperPass : public FunctionPass { + std::unique_ptr Result; + +public: + static char ID; + + DRFAAWrapperPass(); + + DRFAAResult &getResult() { return *Result; } + const DRFAAResult &getResult() const { return *Result; } + + bool runOnFunction(Function &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// Creates an instance of \c DRFAAWrapperPass. +FunctionPass *createDRFAAWrapperPass(); + +} // end namespace llvm + +#endif // LLVM_ANALYSIS_DATARACEFREEALIASANALYSIS_H diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h index 327315f831e11b..a2d07717cb621b 100644 --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -40,6 +40,8 @@ #define LLVM_ANALYSIS_DEPENDENCEANALYSIS_H #include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" @@ -288,6 +290,47 @@ namespace llvm { friend class DependenceInfo; }; + struct GeneralAccess { + Instruction *I = nullptr; + std::optional Loc; + unsigned OperandNum = unsigned(-1); + ModRefInfo ModRef = ModRefInfo::NoModRef; + + GeneralAccess() = default; + GeneralAccess(Instruction *I, std::optional Loc, + unsigned OperandNum, ModRefInfo MRI) + : I(I), Loc(Loc), OperandNum(OperandNum), ModRef(MRI) {} + GeneralAccess(Instruction *I, std::optional Loc, + ModRefInfo MRI) + : I(I), Loc(Loc), ModRef(MRI) {} + + bool isValid() const { + return (I && Loc); + } + const Value *getPtr() const { + if (!Loc) + return nullptr; + return Loc->Ptr; + } + bool isRef() const { + return isRefSet(ModRef); + } + bool isMod() const { + return isModSet(ModRef); + } + + inline bool operator==(const GeneralAccess &RHS) { + if (!isValid() && !RHS.isValid()) + return true; + if (!isValid() || !RHS.isValid()) + return false; + return (I == RHS.I) && (Loc == RHS.Loc) && + (OperandNum == RHS.OperandNum) && (ModRef == RHS.ModRef); + } + }; + + raw_ostream &operator<<(raw_ostream &OS, const GeneralAccess &GA); + /// DependenceInfo - This class is the main dependence-analysis driver. /// class DependenceInfo { @@ -354,6 +397,17 @@ namespace llvm { Function *getFunction() const { return F; } + AAResults *getAA() const { return AA; } + + /// depends - Tests for a dependence between the general accesses SrcA and + /// DstA. Returns NULL if no dependence; otherwise, returns a Dependence + /// (or a FullDependence) with as much information as can be gleaned. The + /// flag PossiblyLoopIndependent should be set by the caller if it appears + /// that control flow can reach from Src to Dst without traversing a loop + /// back edge. + std::unique_ptr depends(GeneralAccess *SrcA, + GeneralAccess *DstA, + bool PossiblyLoopIndependent); private: AAResults *AA; ScalarEvolution *SE; @@ -531,6 +585,7 @@ namespace llvm { const Instruction *Dst); unsigned CommonLevels, SrcLevels, MaxLevels; + const Loop *CommonLoop; /// mapSrcLoop - Given one of the loops containing the source, return /// its level index in our numbering scheme. @@ -544,6 +599,11 @@ namespace llvm { /// in LoopNest. bool isLoopInvariant(const SCEV *Expression, const Loop *LoopNest) const; + /// isTrueAtLoopEntry - Returns true if the predicate LHS `Pred` RHS is true + /// at entry of L. + bool isTrueAtLoopEntry(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) const; + /// Makes sure all subscript pairs share the same integer type by /// sign-extending as necessary. /// Sign-extending a subscript is safe because getelementptr assumes the @@ -580,7 +640,8 @@ namespace llvm { /// extensions and symbolics. bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, - const SCEV *Y) const; + const SCEV *Y, + const Loop *L = nullptr) const; /// isKnownLessThan - Compare to see if S is less than Size /// Another wrapper for isKnownNegative(S - max(Size, 1)) with some extra @@ -968,6 +1029,28 @@ namespace llvm { const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, SmallVectorImpl &DstSubscripts); + /// Given a linear access function, tries to recover subscripts + /// for each dimension of the array element access. + bool tryDelinearize(GeneralAccess *SrcA, GeneralAccess *DstA, + SmallVectorImpl &Pair); + + /// Tries to delinearize access function for a fixed size multi-dimensional + /// array, by deriving subscripts from GEP instructions. Returns true upon + /// success and false otherwise. + bool tryDelinearizeFixedSize(GeneralAccess *SrcA, GeneralAccess *DstA, + const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, + SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts); + + /// Tries to delinearize access function for a multi-dimensional array with + /// symbolic runtime sizes. + /// Returns true upon success and false otherwise. + bool tryDelinearizeParametricSize( + GeneralAccess *SrcA, GeneralAccess *DstA, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts); + /// checkSubscript - Helper function for checkSrcSubscript and /// checkDstSubscript to avoid duplicate code bool checkSubscript(const SCEV *Expr, const Loop *LoopNest, diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index eb35ef515a1fdd..43369197b573f5 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -169,8 +169,9 @@ class MemoryDepChecker { const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) - : PSE(PSE), InnermostLoop(L) {} + MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L, + TaskInfo *TI = nullptr) + : PSE(PSE), InnermostLoop(L), TI(TI) {} /// Register the location (instructions are given increasing numbers) /// of a write access. @@ -302,6 +303,9 @@ class MemoryDepChecker { /// RecordDependences is true. SmallVector Dependences; + /// Optional TaskInfo + TaskInfo *TI; + /// Check whether there is a plausible dependence between the two /// accesses. /// @@ -562,7 +566,8 @@ class RuntimePointerChecking { class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AAResults *AA, DominatorTree *DT, LoopInfo *LI); + AAResults *AA, DominatorTree *DT, LoopInfo *LI, + TaskInfo *TI = nullptr); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -639,7 +644,8 @@ class LoopAccessInfo { private: /// Analyze the loop. void analyzeLoop(AAResults *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + TaskInfo *TI); /// Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -773,12 +779,14 @@ class LoopAccessInfoManager { AAResults &AA; DominatorTree &DT; LoopInfo &LI; + TaskInfo &TI; const TargetLibraryInfo *TLI = nullptr; public: LoopAccessInfoManager(ScalarEvolution &SE, AAResults &AA, DominatorTree &DT, - LoopInfo &LI, const TargetLibraryInfo *TLI) - : SE(SE), AA(AA), DT(DT), LI(LI), TLI(TLI) {} + LoopInfo &LI, TaskInfo &TI, + const TargetLibraryInfo *TLI) + : SE(SE), AA(AA), DT(DT), LI(LI), TI(TI), TLI(TLI) {} const LoopAccessInfo &getInfo(Loop &L); diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h index d22675a308aac7..a0170c3653d882 100644 --- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h +++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h @@ -43,6 +43,7 @@ class MemorySSA; class ScalarEvolution; class TargetLibraryInfo; class TargetTransformInfo; +class TaskInfo; /// The adaptor from a function pass to a loop pass computes these analyses and /// makes them available to the loop passes "for free". Each loop pass is @@ -56,6 +57,7 @@ struct LoopStandardAnalysisResults { ScalarEvolution &SE; TargetLibraryInfo &TLI; TargetTransformInfo &TTI; + TaskInfo &TI; BlockFrequencyInfo *BFI; BranchProbabilityInfo *BPI; MemorySSA *MSSA; diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h index 3434630c27cfe7..a8ca19677bd480 100644 --- a/llvm/include/llvm/Analysis/LoopInfo.h +++ b/llvm/include/llvm/Analysis/LoopInfo.h @@ -65,6 +65,51 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase { explicit operator bool() const { return Start && End; } }; + /// Return all blocks inside the loop that have successors outside of the + /// loop. These are the blocks _inside of the current loop_ which branch out. + /// The returned list is always unique. + void getExitingBlocks(SmallVectorImpl &ExitingBlocks, + bool IgnoreDetachUnwind = false) const; + + /// If getExitingBlocks would return exactly one block, return that block. + /// Otherwise return null. + BasicBlock *getExitingBlock(bool IgnoreDetachUnwind = false) const; + + /// Get basic blocks that are outside of the loop, based on CFG analysis, but + /// inside tasks created within the loop. Many analyses and optimizations + /// should treat these blocks as part of the loop. + void getTaskExits(SmallPtrSetImpl &TaskExits) const; + + /// Return all of the successor blocks of this loop. These are the blocks + /// _outside of the current loop_ which are branched to. + void getExitBlocks(SmallVectorImpl &ExitBlocks) const; + + /// If getExitBlocks would return exactly one block, return that block. + /// Otherwise return null. + BasicBlock *getExitBlock() const; + + /// Return true if no exit block for the loop has a predecessor that is + /// outside the loop. + bool hasDedicatedExits() const; + + /// Return all unique successor blocks of this loop. + /// These are the blocks _outside of the current loop_ which are branched to. + void getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const; + + /// Return all unique successor blocks of this loop except successors from + /// Latch block are not considered. If the exit comes from Latch has also + /// non Latch predecessor in a loop it will be added to ExitBlocks. + /// These are the blocks _outside of the current loop_ which are branched to. + void + getUniqueNonLatchExitBlocks(SmallVectorImpl &ExitBlocks) const; + + /// If getUniqueExitBlocks would return exactly one block, return that block. + /// Otherwise return null. + BasicBlock *getUniqueExitBlock() const; + + /// Return all pairs of (_inside_block_,_outside_block_). + void getExitEdges(SmallVectorImpl &ExitEdges) const; + /// Return true if the specified value is loop invariant. bool isLoopInvariant(const Value *V) const; @@ -375,6 +420,14 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase { /// Add llvm.loop.mustprogress to this loop's loop id metadata. void setLoopMustProgress(); + /// Add llvm.loop.from.tapir.loop to this loop's loop id metadata, to indicate + /// that this loop was derived from a Tapir loop. + void setDerivedFromTapirLoop(); + + /// Returns true if the loop was derived from a Tapir loop, according to its + /// metadata. + bool wasDerivedFromTapirLoop() const; + void dump() const; void dumpVerbose() const; diff --git a/llvm/include/llvm/Analysis/LoopIterator.h b/llvm/include/llvm/Analysis/LoopIterator.h index 360f196a80daf4..bf686e3f280f96 100644 --- a/llvm/include/llvm/Analysis/LoopIterator.h +++ b/llvm/include/llvm/Analysis/LoopIterator.h @@ -104,6 +104,7 @@ class LoopBlocksDFS { private: Loop *L; + SmallPtrSet TaskExitBlocks; /// Map each block to its postorder number. A block is only mapped after it is /// preorder visited by DFS. It's postorder number is initially zero and set @@ -112,8 +113,14 @@ class LoopBlocksDFS { std::vector PostBlocks; public: - LoopBlocksDFS(Loop *Container) : - L(Container), PostNumbers(NextPowerOf2(Container->getNumBlocks())) { + LoopBlocksDFS(Loop *Container) + : L(Container), PostNumbers(NextPowerOf2(Container->getNumBlocks())) { + PostBlocks.reserve(Container->getNumBlocks()); + } + LoopBlocksDFS(Loop *Container, bool IncludeTaskExits) + : L(Container), PostNumbers(NextPowerOf2(Container->getNumBlocks())) { + if (IncludeTaskExits) + L->getTaskExits(TaskExitBlocks); PostBlocks.reserve(Container->getNumBlocks()); } @@ -123,7 +130,9 @@ class LoopBlocksDFS { void perform(LoopInfo *LI); /// Return true if postorder numbers are assigned to all loop blocks. - bool isComplete() const { return PostBlocks.size() == L->getNumBlocks(); } + bool isComplete() const { + return PostBlocks.size() == (L->getNumBlocks() + TaskExitBlocks.size()); + } /// Iterate over the cached postorder blocks. POIterator beginPostorder() const { @@ -175,6 +184,8 @@ class LoopBlocksRPO { public: LoopBlocksRPO(Loop *Container) : DFS(Container) {} + LoopBlocksRPO(Loop *Container, bool IncludeTaskExits) + : DFS(Container, IncludeTaskExits) {} /// Traverse the loop blocks and store the DFS result. void perform(LoopInfo *LI) { @@ -229,7 +240,7 @@ class LoopBlocksTraversal { /// /// TODO: If anyone is interested, we could record preorder numbers here. bool visitPreorder(BasicBlock *BB) { - if (!DFS.L->contains(LI->getLoopFor(BB))) + if (!DFS.L->contains(LI->getLoopFor(BB)) && !DFS.TaskExitBlocks.count(BB)) return false; return DFS.PostNumbers.insert(std::make_pair(BB, 0)).second; diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 711bbf6a0afe5f..f09405391ebd74 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -118,6 +118,10 @@ std::optional getAllocSize( return V; }); +/// Gets the size arguments for the requested allocation. +std::pair getAllocSizeArgs(const CallBase *CB, + const TargetLibraryInfo *TLI); + /// If this is a call to an allocation function that initializes memory to a /// fixed value, return said value in the requested type. Otherwise, return /// nullptr. diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h index 27185aa9942e4e..a4b269a0c2a6f6 100644 --- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -32,6 +32,7 @@ class AssumptionCache; class BatchAAResults; class DominatorTree; class PHITransAddr; +class TaskInfo; /// A memory dependence query can return one of three different answers. class MemDepResult { @@ -356,6 +357,7 @@ class MemoryDependenceResults { const TargetLibraryInfo &TLI; DominatorTree &DT; PredIteratorCache PredCache; + TaskInfo *TI; unsigned DefaultBlockScanLimit; @@ -366,8 +368,9 @@ class MemoryDependenceResults { public: MemoryDependenceResults(AAResults &AA, AssumptionCache &AC, const TargetLibraryInfo &TLI, DominatorTree &DT, - unsigned DefaultBlockScanLimit) - : AA(AA), AC(AC), TLI(TLI), DT(DT), + unsigned DefaultBlockScanLimit, + TaskInfo *TI = nullptr) + : AA(AA), AC(AC), TLI(TLI), DT(DT), TI(TI), DefaultBlockScanLimit(DefaultBlockScanLimit) {} /// Handle invalidation in the new PM. diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h index 94d7f1a78b8470..99293fc4ac41b1 100644 --- a/llvm/include/llvm/Analysis/MemorySSA.h +++ b/llvm/include/llvm/Analysis/MemorySSA.h @@ -115,6 +115,7 @@ class LLVMContext; class MemoryAccess; class MemorySSAWalker; class Module; +class TaskInfo; class Use; class Value; class raw_ostream; @@ -699,7 +700,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) /// accesses. class MemorySSA { public: - MemorySSA(Function &, AliasAnalysis *, DominatorTree *); + MemorySSA(Function &, AliasAnalysis *, DominatorTree *, TaskInfo * = nullptr); // MemorySSA must remain where it's constructed; Walkers it creates store // pointers to it. @@ -871,6 +872,7 @@ class MemorySSA { void renumberBlock(const BasicBlock *) const; AliasAnalysis *AA = nullptr; DominatorTree *DT; + TaskInfo *TI; Function &F; // Memory SSA mappings @@ -915,7 +917,7 @@ class MemorySSAUtil { // This function should not be used by new passes. static bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, - AliasAnalysis &AA); + AliasAnalysis &AA, TaskInfo *TI = nullptr); }; /// An analysis that produces \c MemorySSA for a function. diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h index 9c97bd1725ac2c..51e506b89e45e0 100644 --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -42,6 +42,7 @@ class Loop; class LoopInfo; class PostDominatorTree; class raw_ostream; +class TaskInfo; /// Captures loop safety information. /// It keep information for loop blocks may throw exception or otherwise @@ -96,6 +97,7 @@ class LoopSafetyInfo { /// least once (under the assumption that the loop is entered). virtual bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, + const TaskInfo *TI, const Loop *CurLoop) const = 0; LoopSafetyInfo() = default; @@ -121,6 +123,7 @@ class SimpleLoopSafetyInfo: public LoopSafetyInfo { bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, + const TaskInfo *TI, const Loop *CurLoop) const override; }; @@ -146,6 +149,7 @@ class ICFLoopSafetyInfo: public LoopSafetyInfo { bool isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, + const TaskInfo *TI, const Loop *CurLoop) const override; /// Returns true if we could not execute a memory-modifying instruction before diff --git a/llvm/include/llvm/Analysis/SparsePropagation.h b/llvm/include/llvm/Analysis/SparsePropagation.h index d5805a7314757f..7cee58b38aa848 100644 --- a/llvm/include/llvm/Analysis/SparsePropagation.h +++ b/llvm/include/llvm/Analysis/SparsePropagation.h @@ -337,6 +337,11 @@ void SparseSolver::getFeasibleSuccessors( return; } + if (isa(TI) || isa(TI) || isa(TI)) { + Succs.assign(Succs.size(), true); + return; + } + SwitchInst &SI = cast(TI); LatticeVal SCValue; if (AggressiveUndef) diff --git a/llvm/include/llvm/Analysis/TapirRaceDetect.h b/llvm/include/llvm/Analysis/TapirRaceDetect.h new file mode 100644 index 00000000000000..9def6b92307125 --- /dev/null +++ b/llvm/include/llvm/Analysis/TapirRaceDetect.h @@ -0,0 +1,365 @@ +//===-- TapirRaceDetect.h - Tapir determinacy-race detection ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TapirRaceDetect is an LLVM pass that analyses Tapir tasks and dependences +// between memory accesses to find accesses that might race. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TAPIRRACEDETECT_H +#define LLVM_ANALYSIS_TAPIRRACEDETECT_H + +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" + +namespace llvm { + +class Loop; +class LoopInfo; +class RuntimePointerChecking; +class ScalarEvolution; +class StratABIList; +class TargetLibraryInfo; +class TaskInfo; + +/// RaceInfo +class RaceInfo { +public: + // Possible conditions for a race: + // + // 1) Within the function, two instructions that might execute in parallel + // access aliasing locations, and at least one performs a write. + // + // 2) An instruction reads or writes a location that might alias a global + // variable or function argument. In this case, the race would occur via an + // ancestor of the invocation of this function. + enum RaceType + { + None = 0, + Local = 1, // Possible race via local pointer or control flow. + ViaAncestorRef = 2, // Possible race with ref in caller (e.g., via function + // parameter or global) + ViaAncestorMod = 4, // Possible race with mod inf caller (e.g., via function + // parameter or global) + Opaque = 8, // Possible race via unknown program state (e.g., global data) + }; + + static RaceType setLocalRace(const RaceType RT) { + return RaceType(static_cast(RT) | static_cast(Local)); + } + static RaceType setRaceViaAncestorRef(const RaceType RT) { + return RaceType(static_cast(RT) | + static_cast(ViaAncestorRef)); + } + static RaceType setRaceViaAncestorMod(const RaceType RT) { + return RaceType(static_cast(RT) | + static_cast(ViaAncestorMod)); + } + static RaceType setOpaqueRace(const RaceType RT) { + return RaceType(static_cast(RT) | static_cast(Opaque)); + } + static RaceType clearOpaqueRace(const RaceType RT) { + return RaceType(static_cast(RT) & ~static_cast(Opaque)); + } + static RaceType unionRaceTypes(const RaceType RT1, const RaceType RT2) { + return RaceType(static_cast(RT1) | static_cast(RT2)); + } + + static bool isRace(const RaceType RT) { + return (RaceType::None != RT); + } + static bool isLocalRace(const RaceType RT) { + return (static_cast(RT) & static_cast(RaceType::Local)) == + static_cast(RaceType::Local); + } + static bool isRaceViaAncestor(const RaceType RT) { + return isRaceViaAncestorRef(RT) || isRaceViaAncestorMod(RT); + } + static bool isRaceViaAncestorRef(const RaceType RT) { + return (static_cast(RT) & + static_cast(RaceType::ViaAncestorRef)) == + static_cast(RaceType::ViaAncestorRef); + } + static bool isRaceViaAncestorMod(const RaceType RT) { + return (static_cast(RT) & + static_cast(RaceType::ViaAncestorMod)) == + static_cast(RaceType::ViaAncestorMod); + } + static bool isOpaqueRace(const RaceType RT) { + return (static_cast(RT) & static_cast(RaceType::Opaque)) == + static_cast(RaceType::Opaque); + } + static void printRaceType(RaceInfo::RaceType RT, raw_ostream &OS) { + if (RaceInfo::isLocalRace(RT)) + OS << "Local"; + if (RaceInfo::isRaceViaAncestor(RT)) { + if (RaceInfo::isLocalRace(RT)) + OS << ", "; + OS << "Via Ancestor"; + if (RaceInfo::isRaceViaAncestorMod(RT)) + OS << " Mod"; + if (RaceInfo::isRaceViaAncestorRef(RT)) + OS << " Ref"; + } + if (RaceInfo::isOpaqueRace(RT)) { + if (RaceInfo::isLocalRace(RT) || RaceInfo::isRaceViaAncestor(RT)) + OS << ", "; + OS << "Opaque"; + } + } + + using MemAccessInfo = PointerIntPair; + + // Struct to store data about a race. + struct RaceData { + MemAccessInfo Access = { nullptr, false }; + unsigned OperandNum = static_cast(-1); + RaceType Type = RaceType::None; + GeneralAccess Racer; + + RaceData() = default; + RaceData(MemAccessInfo Access, unsigned OperandNum, const RaceType RT, + GeneralAccess Racer = GeneralAccess()) + : Access(Access), OperandNum(OperandNum), Type(RT), + Racer(Racer) {} + + const Value *getPtr() const { return Access.getPointer(); } + }; + + // Map to store race results. + struct ResultTy : + public DenseMap> { + + void recordRace(const Instruction *I, MemAccessInfo Access, + unsigned OperandNum, const RaceType RT, + const GeneralAccess &Racer) { + if (!count(I)) { + (*this)[I].push_back(RaceData(Access, OperandNum, RT, Racer)); + return; + } + for (RaceData &RD : (*this)[I]) + if ((RD.Access == Access) && (RD.OperandNum == OperandNum) && + (RD.Racer == Racer)) { + RD.Type = unionRaceTypes(RD.Type, RT); + return; + } + (*this)[I].push_back(RaceData(Access, OperandNum, RT, Racer)); + } + void recordLocalRace(const GeneralAccess &GA, + const GeneralAccess &Racer) { + recordRace(GA.I, MemAccessInfo(GA.getPtr(), GA.isMod()), GA.OperandNum, + RaceType::Local, Racer); + } + void recordRaceViaAncestorRef(const GeneralAccess &GA, + const GeneralAccess &Racer) { + recordRace(GA.I, MemAccessInfo(GA.getPtr(), GA.isMod()), GA.OperandNum, + RaceType::ViaAncestorRef, Racer); + } + void recordRaceViaAncestorMod(const GeneralAccess &GA, + const GeneralAccess &Racer) { + recordRace(GA.I, MemAccessInfo(GA.getPtr(), GA.isMod()), GA.OperandNum, + RaceType::ViaAncestorMod, Racer); + } + void recordOpaqueRace(const GeneralAccess &GA, + const GeneralAccess &Racer) { + recordRace(GA.I, MemAccessInfo(GA.getPtr(), GA.isMod()), GA.OperandNum, + RaceType::Opaque, Racer); + } + + RaceType getRaceType(const Instruction *I, + const SmallPtrSetImpl *Filter = nullptr) const { + if (!count(I)) + return RaceType::None; + RaceType RT = RaceType::None; + + // Union the recorded race types + for (RaceData &RD : lookup(I)) { + if (Filter && RD.Racer.isValid() && Filter->count(RD.Racer.I)) + continue; + RT = unionRaceTypes(RD.Type, RT); + } + return RT; + } + + ModRefInfo getLocalRaceModRef( + const Instruction *I, + const SmallPtrSetImpl *Filter = nullptr) const { + if (!count(I)) + return ModRefInfo::NoModRef; + + ModRefInfo MRI = ModRefInfo::NoModRef; + // Union the recorded local race mod-ref info + for (RaceData &RD : lookup(I)) { + if (RaceType::Local != RD.Type) + continue; + if (Filter && RD.Racer.isValid() && Filter->count(RD.Racer.I)) + continue; + if (!RD.Racer.isValid()) + return ModRefInfo::ModRef; + if (RD.Racer.isMod()) + MRI |= ModRefInfo::Mod; + if (RD.Racer.isRef()) + MRI |= ModRefInfo::Ref; + } + return MRI; + } + }; + using ObjectMRTy = DenseMap; + using PtrChecksTy = + DenseMap>; + using AccessToUnderlyingObjMap = + DenseMap>; + + using FilterTy = const SmallPtrSetImpl; + + RaceInfo(Function *F, DominatorTree &DT, LoopInfo &LI, TaskInfo &TI, + DependenceInfo &DI, ScalarEvolution &SE, + const TargetLibraryInfo *TLI); + + const SmallVectorImpl &getRaceData(const Instruction *I) { + return Result[I]; + } + + RaceType getRaceType(const Instruction *I, FilterTy *Filter = nullptr) const { + return Result.getRaceType(I, Filter); + } + bool mightRace(const Instruction *I, FilterTy *Filter = nullptr) const { + return isRace(getRaceType(I, Filter)); + } + bool mightRaceLocally(const Instruction *I, + FilterTy *Filter = nullptr) const { + return isLocalRace(getRaceType(I, Filter)); + } + bool mightRaceViaAncestor(const Instruction *I, + FilterTy *Filter = nullptr) const { + return isRaceViaAncestor(getRaceType(I, Filter)); + } + bool mightRaceViaAncestorRef(const Instruction *I, + FilterTy *Filter = nullptr) const { + return isRaceViaAncestorRef(getRaceType(I, Filter)); + } + bool mightRaceViaAncestorMod(const Instruction *I, + FilterTy *Filter = nullptr) const { + return isRaceViaAncestorMod(getRaceType(I, Filter)); + } + bool mightRaceOpaquely(const Instruction *I, + FilterTy *Filter = nullptr) const { + return isOpaqueRace(getRaceType(I, Filter)); + } + + const ObjectMRTy &getObjectMRForRace() const { + return ObjectMRForRace; + } + bool ObjectInvolvedInRace(const Value *V) const { + return ObjectMRForRace.count(V); + } + ModRefInfo GetObjectMRForRace(const Value *V) const { + if (!ObjectInvolvedInRace(V)) + return ModRefInfo::NoModRef; + return ObjectMRForRace.lookup(V); + } + + RaceType getOverallRaceType() const { + RaceType RT = RaceType::None; + for (auto Res : Result) + for (auto &RD : Res.second) + RT = unionRaceTypes(RT, RD.Type); + return RT; + } + + ModRefInfo getLocalRaceModRef( + const Instruction *I, + const SmallPtrSetImpl *Filter = nullptr) const { + return Result.getLocalRaceModRef(I, Filter); + } + + void getObjectsFor(Instruction *I, SmallPtrSetImpl &Objects); + void getObjectsFor(MemAccessInfo Access, + SmallPtrSetImpl &Objects); + + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + + void print(raw_ostream &) const; + + AAResults *getAA() const { return DI.getAA(); } + ScalarEvolution *getSE() const { return &SE; } + +private: + void analyzeFunction(); + + Function *F; + + // Analyses + DominatorTree &DT; + LoopInfo &LI; + TaskInfo &TI; + DependenceInfo &DI; + ScalarEvolution &SE; + const TargetLibraryInfo *TLI; + + ResultTy Result; + // Map from underlying objects to mod/ref behavior necessary for potential + // race. + ObjectMRTy ObjectMRForRace; + PtrChecksTy AllPtrRtChecks; + + AccessToUnderlyingObjMap AccessToObjs; +}; + +// AnalysisPass +class TapirRaceDetect : public AnalysisInfoMixin { +public: + using Result = RaceInfo; + Result run(Function &F, FunctionAnalysisManager &FAM); + +private: + static AnalysisKey Key; + friend struct AnalysisInfoMixin; +}; // class TapirRaceDetect + +// Printer pass +class TapirRaceDetectPrinterPass + : public PassInfoMixin { +public: + TapirRaceDetectPrinterPass(raw_ostream &OS) : OS(OS) {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); + +private: + raw_ostream &OS; +}; // class TapirRaceDetectPrinterPass + +// Legacy pass manager pass +class TapirRaceDetectWrapperPass : public FunctionPass { +public: + static char ID; + + TapirRaceDetectWrapperPass(); + + bool runOnFunction(Function &F) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &) const override; + void print(raw_ostream &, const Module * = nullptr) const override; + RaceInfo &getRaceInfo() const; + +private: + std::unique_ptr Info; +}; // class TapirRaceDetectWrapperPass + +// createTapirRaceDetectWrapperPass - This creates an instance of the +// TapirRaceDetect wrapper pass. +FunctionPass *createTapirRaceDetectWrapperPass(); + +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Analysis/TapirTargetFuncs.def b/llvm/include/llvm/Analysis/TapirTargetFuncs.def new file mode 100644 index 00000000000000..2fb2bd867eb7a5 --- /dev/null +++ b/llvm/include/llvm/Analysis/TapirTargetFuncs.def @@ -0,0 +1,29 @@ +//===-- TapirTargetFuncs.def - Library information ----*- C++ -*-----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This .def file will either fill in the enum definition or fill in the +// string representation array definition for TargetLibraryInfo. +// Which is defined depends on whether TLI_DEFINE_ENUM is defined or +// TLI_DEFINE_STRING is defined. Only one should be defined at a time. + +#define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr, + +#if defined(TLI_DEFINE_CILK_LIBS) +/// unsigned __cilkrts_get_nworkers(void); +TLI_DEFINE_STRING_INTERNAL("__cilkrts_get_nworkers") +/// unsigned __cilkrts_get_worker_number(void); +TLI_DEFINE_STRING_INTERNAL("__cilkrts_get_worker_number") +/// void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key); +TLI_DEFINE_STRING_INTERNAL("__cilkrts_hyper_create") +/// void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key); +TLI_DEFINE_STRING_INTERNAL("__cilkrts_hyper_destroy") +/// void *__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key); +TLI_DEFINE_STRING_INTERNAL("__cilkrts_hyper_lookup") +#endif + +#undef TLI_DEFINE_STRING_INTERNAL diff --git a/llvm/include/llvm/Analysis/TapirTaskInfo.h b/llvm/include/llvm/Analysis/TapirTaskInfo.h new file mode 100644 index 00000000000000..88066904dce84a --- /dev/null +++ b/llvm/include/llvm/Analysis/TapirTaskInfo.h @@ -0,0 +1,1545 @@ +//===- TapirTaskInfo.h - Tapir task calculator ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TapirTaskInfo class that is used to identify parallel +// tasks as represented in Tapir. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_TAPIRTASKINFO_H +#define LLVM_ANALYSIS_TAPIRTASKINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include +#include + +namespace llvm { + +class PHINode; +class Loop; +class raw_ostream; +class Spindle; +class Task; +class TaskInfo; + +//===----------------------------------------------------------------------===// +/// In Tapir, the basic blocks in a function can be partitioned into +/// spindles. A spindle is a connected set of basic blocks with a +/// single entry point for parallel control flow. When executed, all +/// blocks within a spindle are guaranteed to execute sequentially on +/// one worker. +/// +class Spindle { +public: + enum SPType { Entry, Detach, Sync, Phi }; + using SpindleEdge = std::pair; + +private: + SPType Ty; + + Task *ParentTask = nullptr; + + // The list of basic blocks in this spindle. The first entry is the entry + // block of the spindle. + std::vector Blocks; + + SmallPtrSet DenseBlockSet; + + // Predecessor and successor spindles. + SmallVector Incoming; + SmallVector Outgoing; + + // If this spindle starts with a taskframe.create, TaskFrameUser points to the + // task that uses that created taskframe. + Task *TaskFrameUser = nullptr; + Spindle *TaskFrameParent = nullptr; + SetVector SubTaskFrames; + SetVector TaskFrameSubtasks; + SetVector TaskFrameSpindles; + + Spindle(const Spindle &) = delete; + const Spindle &operator=(const Spindle &) = delete; + +public: + BasicBlock *getEntry() const { return getBlocks().front(); } + bool isEntry(const BasicBlock *B) const { return (getBlocks().front() == B); } + Task *getParentTask() const { return ParentTask; } + + void setParentTask(Task *T) { ParentTask = T; } + + SPType getType() const { return Ty; } + bool isSync() const { return Sync == Ty; } + bool isPhi() const { return Phi == Ty; } + + Value *getTaskFrameCreate() const; + Task *getTaskFrameUser() const { return TaskFrameUser; } + Spindle *getTaskFrameParent() const { return TaskFrameParent; } + BasicBlock *getTaskFrameContinuation() const; + /// Return the nesting level of this taskframe. + unsigned getTaskFrameDepth() const { + unsigned D = 0; + for (const Spindle *CurTF = TaskFrameParent; CurTF; + CurTF = CurTF->TaskFrameParent) + ++D; + return D; + } + + Task *getTaskFromTaskFrame() const; + + /// Return true if the specified basic block is in this spindle. + bool contains(const BasicBlock *BB) const { + return DenseBlockSet.count(BB); + } + + /// Return true if the specified instruction is in this spindle. + bool contains(const Instruction *Inst) const { + return contains(Inst->getParent()); + } + + /// Returns true if the given spindle \p S is in the set of this spindle's + /// taskframe spindles. Returns false if this is not a taskframe.create + /// spindle or if \p S is not in the set. + bool taskFrameContains(Spindle *S) const { + return TaskFrameSpindles.count(S); + } + + /// Return true if this spindle is a shared EH spindle. + bool isSharedEH() const; + + /// Return true if this spindle is the continuation of a detached task. + bool isTaskContinuation() const; + + /// Return true if the predecessor spindle Pred is part of a different task + /// from this spindle. + bool predInDifferentTask(const Spindle *Pred) const { + return (getParentTask() != Pred->getParentTask()) && !isSharedEH(); + } + /// Return true if the successor spindle Succ is part of the same task as this + /// spindle. + bool succInSameTask(const Spindle *Succ) const; + + /// Return true if the successor spindle Succ is part of the same task as this + /// spindle. + bool succInSubTask(const Spindle *Succ) const; + + /// Get a list of the basic blocks which make up this task. + ArrayRef getBlocks() const { + return Blocks; + } + using iterator = typename ArrayRef::const_iterator; + iterator block_begin() const { return getBlocks().begin(); } + iterator block_end() const { return getBlocks().end(); } + inline iterator_range blocks() const { + return make_range(block_begin(), block_end()); + } + + /// Get the number of blocks in this task in constant time. + unsigned getNumBlocks() const { + return Blocks.size(); + } + + /// Return a direct, mutable handle to the blocks vector so that we can + /// mutate it efficiently with techniques like `std::remove`. + std::vector &getBlocksVector() { + return Blocks; + } + /// Return a direct, mutable handle to the blocks set so that we can + /// mutate it efficiently. + SmallPtrSetImpl &getBlocksSet() { + return DenseBlockSet; + } + + /// True if terminator in the block can branch to another block that is + /// outside of this spindle. + bool isSpindleExiting(const BasicBlock *BB) const { + if (BB->getTerminator()->getNumSuccessors() == 0) + return true; + for (const auto *Succ : children(BB)) + if (!contains(Succ)) + return true; + return false; + } + + /// Helper class for iterator to walk just the exiting basic blocks of the + /// spindle. + class SpindleExitingFilter { + const Spindle *S = nullptr; + public: + SpindleExitingFilter() {} + SpindleExitingFilter(const Spindle *S) : S(S) {} + bool operator()(const BasicBlock *B) const { + return S->isSpindleExiting(B); + } + }; + inline iterator_range< + filter_iterator::iterator, + SpindleExitingFilter>> spindle_exits() { + return make_filter_range(blocks(), SpindleExitingFilter(this)); + } + inline iterator_range< + filter_iterator::const_iterator, + SpindleExitingFilter>> spindle_exits() const { + return make_filter_range(blocks(), SpindleExitingFilter(this)); + } + + // Iterators for the incoming and outgoing edges of this spindle. + using spedge_iterator = typename SmallVectorImpl::iterator; + using spedge_const_iterator = + typename SmallVectorImpl::const_iterator; + using spedge_range = iterator_range; + using spedge_const_range = iterator_range; + + inline spedge_iterator in_begin() { return Incoming.begin(); } + inline spedge_const_iterator in_begin() const { + return Incoming.begin(); + } + inline spedge_iterator in_end() { return Incoming.end(); } + inline spedge_const_iterator in_end() const { + return Incoming.end(); + } + inline spedge_range in_edges() { + return make_range(in_begin(), in_end()); + } + inline spedge_const_range in_edges() const { + return make_range(in_begin(), in_end()); + } + + inline spedge_iterator out_begin() { return Outgoing.begin(); } + inline spedge_const_iterator out_begin() const { + return Outgoing.begin(); + } + inline spedge_iterator out_end() { return Outgoing.end(); } + inline spedge_const_iterator out_end() const { + return Outgoing.end(); + } + inline spedge_range out_edges() { + return make_range(out_begin(), out_end()); + } + inline spedge_const_range out_edges() const { + return make_range(out_begin(), out_end()); + } + + template + class adj_iterator_impl + : public iterator_adaptor_base< + adj_iterator_impl, SPEdgeIt, + typename std::iterator_traits::iterator_category, + SpindleT, std::ptrdiff_t, SpindleT *, SpindleT> { + + using BaseT = iterator_adaptor_base< + adj_iterator_impl, SPEdgeIt, + typename std::iterator_traits::iterator_category, + SpindleT, std::ptrdiff_t, SpindleT *, SpindleT>; + + public: + adj_iterator_impl(SPEdgeIt Begin) : BaseT(Begin) {} + inline SpindleT operator*() const { return BaseT::I->first; } + }; + + using adj_iterator = adj_iterator_impl<>; + using adj_const_iterator = + adj_iterator_impl; + using adj_range = iterator_range; + using adj_const_range = iterator_range; + + using tf_subtask_iterator = typename SetVector::const_iterator; + using tf_subtask_const_iterator = tf_subtask_iterator; + inline tf_subtask_iterator tf_subtask_begin() const { + return TaskFrameSubtasks.begin(); + } + inline tf_subtask_iterator tf_subtask_end() const { + return TaskFrameSubtasks.end(); + } + inline iterator_range taskframe_subtasks() const { + return make_range(tf_subtask_begin(), tf_subtask_end()); + } + + using subtaskframe_iterator = typename SetVector::const_iterator; + using subtaskframe_const_iterator = subtaskframe_iterator; + inline subtaskframe_iterator subtaskframe_begin() const { + return SubTaskFrames.begin(); + } + inline subtaskframe_iterator subtaskframe_end() const { + return SubTaskFrames.end(); + } + inline iterator_range subtaskframes() const { + return make_range(subtaskframe_begin(), subtaskframe_end()); + } + + using tf_spindle_iterator = typename SetVector::const_iterator; + using tf_spindle_const_iterator = tf_spindle_iterator; + inline tf_spindle_iterator tf_spindle_begin() const { + return TaskFrameSpindles.begin(); + } + inline tf_spindle_iterator tf_spindle_end() const { + return TaskFrameSpindles.end(); + } + inline iterator_range taskframe_spindles() const { + return make_range(tf_spindle_begin(), tf_spindle_end()); + } + + /// Print spindle with all the BBs inside it. + void print(raw_ostream &OS, bool Verbose = false) const; + + /// Raw method to add block B to this spindle. + void addBlock(BasicBlock &B) { + Blocks.push_back(&B); + DenseBlockSet.insert(&B); + } + + // Returns true if the basic block B predeces this spindle. + bool blockPrecedesSpindle(const BasicBlock *B) const { + for (const BasicBlock *SB : successors(B)) + if (SB == getEntry()) + return true; + return false; + } + + // Raw method to add spindle S as a predecessor of this spindle. + void addSpindleEdgeTo(Spindle *Succ, BasicBlock *FromExit) { + assert(contains(FromExit) && + "Cannot add spindle edge from block not in this spindle"); + assert(Succ->blockPrecedesSpindle(FromExit) && + "FromExit must precede successor spindle"); + Outgoing.push_back(SpindleEdge(Succ, FromExit)); + Succ->Incoming.push_back(SpindleEdge(this, FromExit)); + } + +protected: + friend class Task; + friend class TaskInfo; + + /// This creates an empty spindle. + Spindle() = default; + + explicit Spindle(BasicBlock *BB, SPType Ty) : Ty(Ty) { + Blocks.push_back(BB); + DenseBlockSet.insert(BB); + } + + // To allow passes like SCEV to key analysis results off of `Task` pointers, + // we disallow re-use of pointers within a task pass manager. This means task + // passes should not be `delete` ing `Task` objects directly (and risk a later + // `Task` allocation re-using the address of a previous one) but should be + // using TaskInfo::markAsRemoved, which keeps around the `Task` pointer till + // the end of the lifetime of the `TaskInfo` object. + // + // To make it easier to follow this rule, we mark the destructor as + // non-public. + ~Spindle() { + Blocks.clear(); + DenseBlockSet.clear(); + Incoming.clear(); + Outgoing.clear(); + ParentTask = nullptr; + TaskFrameUser = nullptr; + TaskFrameParent = nullptr; + SubTaskFrames.clear(); + TaskFrameSubtasks.clear(); + TaskFrameSpindles.clear(); + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const Spindle &S); + +// Iterators for the predecessors of a Spindle, using the Spindle edges. +using pred_spindle_iterator = typename Spindle::adj_iterator; +using pred_spindle_const_iterator = typename Spindle::adj_const_iterator; +using pred_spindle_range = iterator_range; +using pred_spindle_const_range = iterator_range; + +inline pred_spindle_iterator pred_begin(Spindle *S) { + return pred_spindle_iterator(S->in_begin()); +} +inline pred_spindle_const_iterator pred_begin(const Spindle *S) { + return pred_spindle_const_iterator(S->in_begin()); +} +inline pred_spindle_iterator pred_end(Spindle *S) { + return pred_spindle_iterator(S->in_end()); +} +inline pred_spindle_const_iterator pred_end(const Spindle *S) { + return pred_spindle_const_iterator(S->in_end()); +} +inline pred_spindle_range predecessors(Spindle *S) { + return pred_spindle_range(pred_begin(S), pred_end(S)); +} +inline pred_spindle_const_range predecessors(const Spindle *S) { + return pred_spindle_const_range(pred_begin(S), pred_end(S)); +} + +// Iterators for the successors of a Spindle, using the Spindle edges. +using succ_spindle_iterator = typename Spindle::adj_iterator; +using succ_spindle_const_iterator = typename Spindle::adj_const_iterator; +using succ_spindle_range = iterator_range; +using succ_spindle_const_range = iterator_range; + +inline succ_spindle_iterator succ_begin(Spindle *S) { + return succ_spindle_iterator(S->out_begin()); +} +inline succ_spindle_const_iterator succ_begin(const Spindle *S) { + return succ_spindle_const_iterator(S->out_begin()); +} +inline succ_spindle_iterator succ_end(Spindle *S) { + return succ_spindle_iterator(S->out_end()); +} +inline succ_spindle_const_iterator succ_end(const Spindle *S) { + return succ_spindle_const_iterator(S->out_end()); +} +inline succ_spindle_range successors(Spindle *S) { + return succ_spindle_range(succ_begin(S), succ_end(S)); +} +inline succ_spindle_const_range successors(const Spindle *S) { + return succ_spindle_const_range(succ_begin(S), succ_end(S)); +} + +// Helper class for iterating over spindles within the same task. +class InTaskFilter { + const Spindle *S = nullptr; +public: + InTaskFilter() {} + InTaskFilter(const Spindle *S) : S(S) {} + bool operator()(const Spindle *Succ) const { + return S->succInSameTask(Succ); + } +}; + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for spindle graphs +//===--------------------------------------------------------------------===// + +// Provide specializations of GraphTraits to be able to treat a function +// as a graph of spindles. + +template <> struct GraphTraits { + using NodeRef = Spindle *; + using ChildIteratorType = succ_spindle_iterator; + + static NodeRef getEntryNode(Spindle *S) { return S; } + static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } +}; + +template <> struct GraphTraits { + using NodeRef = const Spindle *; + using ChildIteratorType = succ_spindle_const_iterator; + + static NodeRef getEntryNode(const Spindle *S) { return S; } + static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return succ_end(N); } +}; + +// Provide specializations of GraphTrais to be able to treat a function as a +// graph of spindles and walk it in inverse order. Inverse order in this case +// is considered to be when traversing the predecessor edges of a spindle +// instead of the successor edges. + +template <> struct GraphTraits> { + using NodeRef = Spindle *; + using ChildIteratorType = pred_spindle_iterator; + + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } +}; + +template <> struct GraphTraits> { + using NodeRef = const Spindle *; + using ChildIteratorType = pred_spindle_const_iterator; + + static NodeRef getEntryNode(Inverse G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); } + static ChildIteratorType child_end(NodeRef N) { return pred_end(N); } +}; + +// Special type of GraphTrait that uses a filter on the successors of a spindle. +// This GraphTrait is used to build the InTask and UnderTask GraphTraits. + +template +using FilteredSuccessorSpindles = std::pair; + +template +struct GraphTraits> { + using NodeRef = Spindle *; + using ChildIteratorType = filter_iterator; + + static NodeRef getEntryNode(FilteredSuccessorSpindles S) { + return S.first; + } + static ChildIteratorType child_begin(NodeRef N) { + return make_filter_range(successors(N), Filter(N)).begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return make_filter_range(successors(N), Filter(N)).end(); + } +}; + +template +struct GraphTraits> { + using NodeRef = const Spindle *; + using ChildIteratorType = + filter_iterator; + + static NodeRef getEntryNode( + FilteredSuccessorSpindles S) { + return S.first; + } + static ChildIteratorType child_begin(NodeRef N) { + return make_filter_range(successors(N), Filter(N)).begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return make_filter_range(successors(N), Filter(N)).end(); + } +}; + +// Wrapper to allow traversal of only those spindles within a task, excluding +// all subtasks of that task. +template +struct InTask + : public FilteredSuccessorSpindles { + inline InTask(SpindlePtrT S) + : FilteredSuccessorSpindles + (S, InTaskFilter(S)) {} +}; + +template<> struct GraphTraits> : + public GraphTraits> { + using NodeRef = Spindle *; + static NodeRef getEntryNode(InTask G) { + return G.first; + } +}; +template<> struct GraphTraits> : + public GraphTraits> { + using NodeRef = const Spindle *; + static NodeRef getEntryNode(InTask G) { + return G.first; + } +}; + +// Wrapper to traversal of taskframe tree. +template +struct TaskFrames { + const GraphType &Graph; + + inline TaskFrames(const GraphType &G) : Graph(G) {} +}; + +template <> struct GraphTraits> { + using NodeRef = Spindle *; + using ChildIteratorType = Spindle::subtaskframe_iterator; + + static NodeRef getEntryNode(TaskFrames G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { + return N->subtaskframe_begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return N->subtaskframe_end(); + } +}; + +template <> struct GraphTraits> { + using NodeRef = const Spindle *; + using ChildIteratorType = Spindle::subtaskframe_iterator; + + static NodeRef getEntryNode(TaskFrames G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { + return N->subtaskframe_begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return N->subtaskframe_end(); + } +}; + +//===----------------------------------------------------------------------===// +/// Instances of this class are used to represent Tapir tasks that are detected +/// in the flow graph. +/// +class Task { + Task *ParentTask; + // Dominator tree + DominatorTree &DomTree; + // Tasks contained entirely within this one. + std::vector SubTasks; + + // List of spindles that make up this task. + std::vector Spindles; + SmallPtrSet DenseSpindleSet; + + // List of shared exception-handling spindles associated with this task. + SmallVector SharedSubTaskEH; + SmallPtrSet DenseEHSpindleSet; + + // Pointers to the continuation and exceptional-continuation spindles for this + // task. + Spindle *Continuation = nullptr; + Spindle *EHContinuation = nullptr; + // The exceptional continuation of the task might not be a landingpad, due to + // transformations on exception-handling code. Hence we keep track of the + // value of landingpad at the exceptional continuation. + Value *LPadValueInEHContinuation = nullptr; + + // Spindle that creates the taskframe this task uses. + Spindle *TaskFrameCreateSpindle = nullptr; + + // Set of taskframe.create spindles that are children of this task. + SmallVector TaskFrameCreates; + + // Set of root taskframe.create spindles that are children of this task. + SmallVector TaskFrameRoots; + + Task(const Task &) = delete; + const Task &operator=(const Task &) = delete; + +public: + /// Return the nesting level of this task. An outer-most task has depth 1, + /// for consistency with task depth values used for basic blocks, where depth + /// 0 is used for blocks not inside any tasks. + unsigned getTaskDepth() const { + unsigned D = 0; + for (const Task *CurTask = ParentTask; CurTask; + CurTask = CurTask->ParentTask) + ++D; + return D; + } + Spindle *getEntrySpindle() const { + return getSpindles().front(); + } + BasicBlock *getEntry() const { + return getEntrySpindle()->getEntry(); + } + Task *getParentTask() const { return ParentTask; } + void setParentTask(Task *T) { ParentTask = T; } + + /// Return true if this task is "serial," meaning it does not itself perform a + /// detach. This method does not preclude functions called by this task from + /// performing a detach. + bool isSerial() const { return SubTasks.empty(); } + + /// Return true if this task is a "root" task, meaning that it has no parent task. + bool isRootTask() const { return nullptr == ParentTask; } + + /// Return true if the analysis found child taskframes of this task. This + /// method assumes that taskframes are in canonical form and that + /// findTaskFrameTree() has run. + bool foundChildTaskFrames() const { return !TaskFrameRoots.empty(); } + + /// Return the detach instruction that created this task, or nullptr if this + /// task is a root task. + DetachInst *getDetach() const { + if (isRootTask()) return nullptr; + BasicBlock *Detacher = getEntry()->getSinglePredecessor(); + assert(Detacher && + "Entry block of non-root task should have a single predecessor"); + assert(isa(Detacher->getTerminator()) && + "Single predecessor of a task should be terminated by a detach"); + return dyn_cast(Detacher->getTerminator()); + } + + /// Get the taskframe that this task uses. + Value *getTaskFrameUsed() const { + // Scan the entry block for a taskframe.use intrinsic. If we find one, + // return its argument. + for (const Instruction &I : *getEntry()) + if (const IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::taskframe_use == II->getIntrinsicID()) + return II->getArgOperand(0); + return nullptr; + } + + // Get the spindle that creates the taskframe this task uses. + Spindle *getTaskFrameCreateSpindle() const { return TaskFrameCreateSpindle; } + + /// Get the spindle for the continuation of this task. Returns nullptr if + /// this task is a root task, meaning it has no continuation spindle. + Spindle *getContinuationSpindle() const { + assert(((isRootTask() && !Continuation) || (!isRootTask() && Continuation)) + && "Task should have a continuation spindle iff not a root task."); + return Continuation; + } + + /// Get the spindle for the exceptional continuation o fthis task. Returns + /// nullptr if this task is a root task or the detach for this task does not + /// have an unwind destination. + Spindle *getEHContinuationSpindle() const { + assert(((isRootTask() && !EHContinuation) || + (!isRootTask() && + ((getDetach()->hasUnwindDest() && EHContinuation) || + (!getDetach()->hasUnwindDest() && !EHContinuation)))) && + "Task should have a EH continuation spindle iff not a root task and " + "detach has an unwind destination."); + return EHContinuation; + } + + /// Get the spindle for the exceptional continuation o fthis task. Returns + /// nullptr if this task is a root task or the detach for this task does not + /// have an unwind destination. + Value *getLPadValueInEHContinuationSpindle() const { + assert(((isRootTask() && !LPadValueInEHContinuation) || + (!isRootTask() && + ((getDetach()->hasUnwindDest() && LPadValueInEHContinuation) || + (!getDetach()->hasUnwindDest() && + !LPadValueInEHContinuation)))) && + "Task should have a EH continuation spindle iff not a root task and " + "detach has an unwind destination."); + return LPadValueInEHContinuation; + } + + /// Return true if spindle S is in this task. + bool contains(const Spindle *S) const { + return DenseSpindleSet.count(S); + } + + /// Return true if spindle S is a shared EH spindle dominated by this task. + bool containsSharedEH(const Spindle *S) const { + return DenseEHSpindleSet.count(S); + } + + /// Return true if basic block B is in a shared EH spindle dominated by this + /// task. + bool containsSharedEH(const BasicBlock *B) const { + for (const Spindle *S : SharedSubTaskEH) + if (S->contains(B)) + return true; + return false; + } + + /// Return the tasks contained entirely within this task. + ArrayRef getSubTasks() const { + return SubTasks; + } + std::vector &getSubTasksVector() { + return SubTasks; + } + using iterator = typename std::vector::const_iterator; + using const_iterator = iterator; + using reverse_iterator = + typename std::vector::const_reverse_iterator; + using const_reverse_iterator = reverse_iterator; + inline iterator begin() const { return SubTasks.begin(); } + inline iterator end() const { return SubTasks.end(); } + inline reverse_iterator rbegin() const { return SubTasks.rbegin(); } + inline reverse_iterator rend() const { return SubTasks.rend(); } + inline bool empty() const { return SubTasks.empty(); } + inline iterator_range subtasks() const { + return make_range(begin(), end()); + } + + using tf_iterator = typename SmallVectorImpl::const_iterator; + using tf_const_iterator = tf_iterator; + inline tf_iterator tf_begin() const { return TaskFrameCreates.begin(); } + inline tf_iterator tf_end() const { return TaskFrameCreates.end(); } + inline iterator_range taskframe_creates() const { + return make_range(tf_begin(), tf_end()); + } + inline tf_iterator tf_roots_begin() const { + return TaskFrameRoots.begin(); + } + inline tf_iterator tf_roots_end() const { return TaskFrameRoots.end(); } + inline iterator_range taskframe_roots() const { + return make_range(tf_roots_begin(), tf_roots_end()); + } + + /// Get the number of spindles in this task in constant time. + unsigned getNumSpindles() const { + return Spindles.size(); + } + + /// Return the spindles contained within this task and no subtask. + ArrayRef getSpindles() const { + return Spindles; + } + std::vector &getSpindlesVector() { + return Spindles; + } + SmallPtrSetImpl &getSpindlesSet() { + return DenseSpindleSet; + } + + using spindle_iterator = typename std::vector::const_iterator; + inline spindle_iterator spindle_begin() const { + return Spindles.begin(); + } + inline spindle_iterator spindle_end() const { + return Spindles.end(); + } + inline iterator_range spindles() const { + return make_range(spindle_begin(), spindle_end()); + } + + /// Returns true if this task exits to a shared EH spindle. + bool hasSharedEHExit() const { + if (isRootTask()) return false; + if (!getParentTask()->tracksSharedEHSpindles()) return false; + + for (Spindle *S : getSpindles()) + for (Spindle *Succ : successors(S)) + if (getParentTask()->containsSharedEH(Succ)) + return true; + + return false; + } + + /// Returns true if SharedEH is a shared EH exit of this task. + bool isSharedEHExit(const Spindle *SharedEH) const; + + /// Get the shared EH spindles that this task can exit to and append them to + /// SpindleVec. + void getSharedEHExits(SmallVectorImpl &SpindleVec) const; + + /// Returns true if this task tracks any shared EH spindles for its subtasks. + bool tracksSharedEHSpindles() const { + return !SharedSubTaskEH.empty(); + } + /// Get the number of shared EH spindles in this task in constant time. + unsigned getNumSharedEHSpindles() const { + return SharedSubTaskEH.size(); + } + + /// Return the shared EH spindles contained within this task. + const SmallVectorImpl &getSharedEHSpindles() const { + return SharedSubTaskEH; + } + SmallVectorImpl &getSharedEHSpindles() { + return SharedSubTaskEH; + } + /// Get the shared EH spindle containing basic block B, if it exists. + const Spindle *getSharedEHContaining(const BasicBlock *B) const { + for (const Spindle *S : SharedSubTaskEH) + if (S->contains(B)) + return S; + return nullptr; + } + Spindle *getSharedEHContaining(BasicBlock *B) const { + for (Spindle *S : SharedSubTaskEH) + if (S->contains(B)) + return S; + return nullptr; + } + + using shared_eh_spindle_iterator = + typename SmallVectorImpl::const_iterator; + shared_eh_spindle_iterator shared_eh_spindle_begin() const { + return getSharedEHSpindles().begin(); + } + shared_eh_spindle_iterator shared_eh_spindle_end() const { + return getSharedEHSpindles().end(); + } + inline iterator_range + shared_eh_spindles() const { + return make_range(shared_eh_spindle_begin(), shared_eh_spindle_end()); + } + + /// Get a list of all basic blocks in this task, including blocks in + /// descendant tasks. + void getDominatedBlocks(SmallVectorImpl &Blocks) const { + DomTree.getDescendants(getEntry(), Blocks); + } + + /// Returns true if this task encloses basic block BB simply, that is, without + /// checking any shared EH exits of this task. + bool simplyEncloses(const BasicBlock *BB) const { + return DomTree.dominates(getEntry(), BB); + } + + /// Return true if specified task encloses basic block BB. + bool encloses(const BasicBlock *BB) const { + if (simplyEncloses(BB)) + return true; + if (ParentTask && ParentTask->tracksSharedEHSpindles()) + if (const Spindle *SharedEH = ParentTask->getSharedEHContaining(BB)) + return isSharedEHExit(SharedEH); + return false; + } + + /// Returns either the representative subtask of this task that encloses basic + /// block B or the this task itself if no subtask encloses B. This task must + /// enclose B. + /// + /// These representatives are useful for studying series-parallel + /// relationships between basic blocks in a function when those basic blocks + /// might appear in nested subtasks. + const Task *getSubTaskEnclosing(const BasicBlock *BB) const { + assert(encloses(BB) && "Task does not enclose given BasicBlock"); + for (Task *SubT : subtasks()) + if (SubT->encloses(BB)) + return SubT; + return this; + } + + /// True if terminator in the block can branch to another block that is + /// outside of the current task. + bool isTaskExiting(const BasicBlock *BB) const { + if (BB->getTerminator()->getNumSuccessors() == 0) + return true; + for (const auto *Succ : children(BB)) { + if (isa(Succ->getFirstNonPHIOrDbgOrLifetime())) + continue; + if (!encloses(Succ)) + return true; + } + return false; + } + + /// True if the spindle can exit to a block that is outside of the current + /// task. + bool isTaskExiting(const Spindle *S) const { + for (const BasicBlock *Exit : S->spindle_exits()) + if (isTaskExiting(Exit)) + return true; + return false; + } + + // Returns true if the specified value is defined in the parent of this task. + bool definedInParent(const Value *V) const { + if (isa(V)) return true; + if (const Instruction *I = dyn_cast(V)) + return !encloses(I->getParent()); + return false; + } + + /// Verify task structure + void verify(const TaskInfo *TI, const BasicBlock *Entry, + const DominatorTree &DT) const; + + /// Print task with all the BBs inside it. + void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const; + + void dump() const; + void dumpVerbose() const; + + /// Raw method to add spindle S to this task. + void addSpindle(Spindle &S) { + Spindles.push_back(&S); + DenseSpindleSet.insert(&S); + } + + /// Raw method to add a shared exception-handling spindle S to this task. + void addEHSpindle(Spindle &S) { + SharedSubTaskEH.push_back(&S); + DenseEHSpindleSet.insert(&S); + } + + // Add task ST as a subtask of this task. + void addSubTask(Task *ST) { + assert(!ST->ParentTask && "SubTask already has a parent task."); + ST->setParentTask(this); + SubTasks.push_back(ST); + } + + // Set Spindle S to be the continuation spindle of this task. + void setContinuationSpindle(Spindle *S) { + assert(!isRootTask() && "Root task cannot have a continuation spindle."); + Continuation = S; + } + + // Set S to be the exceptional continuation spindle of this task. + void setEHContinuationSpindle(Spindle *S, Value *LPadVal) { + assert((!isRootTask() || getDetach()->hasUnwindDest()) && + "Task should not have an exceptional continuation."); + EHContinuation = S; + LPadValueInEHContinuation = LPadVal; + } + +protected: + friend class TaskInfo; + + explicit Task(Spindle &Entry, DominatorTree &DomTree) + : ParentTask(nullptr), DomTree(DomTree) { + Spindles.push_back(&Entry); + DenseSpindleSet.insert(&Entry); + } + + // To allow passes like SCEV to key analysis results off of `Task` pointers, + // we disallow re-use of pointers within a task pass manager. This means task + // passes should not be `delete` ing `Task` objects directly (and risk a later + // `Task` allocation re-using the address of a previous one) but should be + // using TaskInfo::markAsRemoved, which keeps around the `Task` pointer till + // the end of the lifetime of the `TaskInfo` object. + // + // To make it easier to follow this rule, we mark the destructor as + // non-public. + ~Task() { + for (auto *SubTask : SubTasks) + SubTask->~Task(); + + for (auto *Spindle : Spindles) + Spindle->~Spindle(); + + for (auto *SharedEH : SharedSubTaskEH) + SharedEH->~Spindle(); + + SubTasks.clear(); + Spindles.clear(); + SharedSubTaskEH.clear(); + DenseSpindleSet.clear(); + DenseEHSpindleSet.clear(); + ParentTask = nullptr; + Continuation = nullptr; + EHContinuation = nullptr; + LPadValueInEHContinuation = nullptr; + TaskFrameCreateSpindle = nullptr; + TaskFrameCreates.clear(); + TaskFrameRoots.clear(); + } +}; + +raw_ostream &operator<<(raw_ostream &OS, const Task &T); + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for task spindle graphs +//===--------------------------------------------------------------------===// + +// Allow clients to walk the list of nested tasks. +template <> struct GraphTraits { + using NodeRef = const Task *; + using ChildIteratorType = Task::const_iterator; + + static NodeRef getEntryNode(const Task *T) { return T; } + static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->end(); } +}; + +template <> struct GraphTraits { + using NodeRef = Task *; + using ChildIteratorType = Task::iterator; + + static NodeRef getEntryNode(Task *T) { return T; } + static ChildIteratorType child_begin(NodeRef N) { return N->begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->end(); } +}; + +// Filter for spindle successors in the same task or a subtask. +class UnderTaskFilter { + const Spindle *S = nullptr; +public: + UnderTaskFilter() {} + UnderTaskFilter(const Spindle *S) : S(S) {} + bool operator()(const Spindle *Succ) const { + return S->succInSameTask(Succ) || + (Succ->getParentTask()->getParentTask() == S->getParentTask()); + } +}; + +// Wrapper to allow traversal of only those spindles within a task, including +// all subtasks of that task. +template +struct UnderTask + : public FilteredSuccessorSpindles { + inline UnderTask(SpindlePtrT S) + : FilteredSuccessorSpindles + (S, UnderTaskFilter(S)) {} +}; + +template<> struct GraphTraits> : + public GraphTraits> { + using NodeRef = Spindle *; + static NodeRef getEntryNode(UnderTask G) { + return G.first; + } +}; +template<> struct GraphTraits> : + public GraphTraits> { + using NodeRef = const Spindle *; + static NodeRef getEntryNode(UnderTask G) { + return G.first; + } +}; + +// Structure to record the synced state of each spindle. +struct IsSyncedState { + enum class SyncInfo { + Unsynced = 0, + Synced = 1, + TaskEntry = 2, + NoUnsync = Synced | TaskEntry, + Incomplete = 4, + }; + + static inline bool isUnsynced(const SyncInfo SyncI) { + return (static_cast(SyncI) & static_cast(SyncInfo::NoUnsync)) == + static_cast(SyncInfo::Unsynced); + } + static inline bool isSynced(const SyncInfo SyncI) { + return !isUnsynced(SyncI); + } + static inline bool isIncomplete(const SyncInfo SyncI) { + return (static_cast(SyncI) & static_cast(SyncInfo::Incomplete)) == + static_cast(SyncInfo::Incomplete); + } + static inline SyncInfo setUnsynced(const SyncInfo SyncI) { + // Once a sync state is set to unsynced, it's complete. + return SyncInfo(static_cast(SyncI) & + static_cast(SyncInfo::Unsynced)); + } + static inline SyncInfo setIncomplete(const SyncInfo SyncI) { + return SyncInfo(static_cast(SyncI) | + static_cast(SyncInfo::Incomplete)); + } + static inline SyncInfo setComplete(const SyncInfo SyncI) { + return SyncInfo(static_cast(SyncI) & + ~static_cast(SyncInfo::Incomplete)); + } + + DenseMap SyncedState; + + bool markDefiningSpindle(const Spindle *S); + bool evaluate(const Spindle *S, unsigned EvalNum); +}; + +using MPTaskListTy = DenseMap>; + +// Structure to record the set of child tasks that might be in parallel with +// this spindle. +struct MaybeParallelTasks { + MPTaskListTy TaskList; + + // TODO: Use a bitvector representation to perform the analysis. + + bool markDefiningSpindle(const Spindle *S); + bool evaluate(const Spindle *S, unsigned EvalNum); +}; + +//===----------------------------------------------------------------------===// +/// This class builds and contains all of the top-level task structures in the +/// specified function. +/// +class TaskInfo { + // BBMap - Mapping of basic blocks to the innermost spindle they occur in + DenseMap BBMap; + // SpindleMap - Mapping of spindles to the innermost task they occur in + DenseMap SpindleMap; + // Pointer to the root task for the function. All tasks detached within this + // function body are descendants of this root task. + Task *RootTask = nullptr; + + // Cache storing maybe-parallel-task state. This cache is initialized lazily + // by calls to the mayHappenInParallel method. + mutable std::unique_ptr MPTasks; + + // Flag to indicate whether the taskframe tree has been computed. + mutable bool ComputedTaskFrameTree = false; + + BumpPtrAllocator TaskAllocator; + + void operator=(const TaskInfo &) = delete; + TaskInfo(const TaskInfo &) = delete; + + // Helper for computing the spindles and subtasks contained in all taskframes. + void findTaskFrameTreeHelper(Spindle *TFSpindle, + SmallVectorImpl &ParentWorkList, + SmallPtrSetImpl &SubTFVisited); + +public: + TaskInfo() = default; + ~TaskInfo() { releaseMemory(); } + + TaskInfo(TaskInfo &&Arg) + : BBMap(std::move(Arg.BBMap)), + SpindleMap(std::move(Arg.SpindleMap)), + RootTask(std::move(Arg.RootTask)), + MPTasks(std::move(Arg.MPTasks)), + TaskAllocator(std::move(Arg.TaskAllocator)) { + Arg.RootTask = nullptr; + } + TaskInfo &operator=(TaskInfo &&RHS) { + BBMap = std::move(RHS.BBMap); + SpindleMap = std::move(RHS.SpindleMap); + if (RootTask) + RootTask->~Task(); + RootTask = std::move(RHS.RootTask); + MPTasks = std::move(RHS.MPTasks); + TaskAllocator = std::move(RHS.TaskAllocator); + RHS.RootTask = nullptr; + return *this; + } + + void releaseMemory() { + for (auto BBToSpindle : BBMap) + if (!BBToSpindle.getSecond()->getParentTask()) + BBToSpindle.getSecond()->~Spindle(); + for (auto SpindleToTask : SpindleMap) + if (RootTask != SpindleToTask.getSecond() && + !SpindleToTask.getSecond()->getParentTask()) + SpindleToTask.getSecond()->~Task(); + + BBMap.clear(); + SpindleMap.clear(); + if (RootTask) + RootTask->~Task(); + RootTask = nullptr; + if (MPTasks) { + MPTasks->TaskList.clear(); + MPTasks.reset(); + } + ComputedTaskFrameTree = false; + TaskAllocator.Reset(); + } + + template Spindle *AllocateSpindle(ArgsTy &&... Args) { + Spindle *Storage = TaskAllocator.Allocate(); + return new (Storage) Spindle(std::forward(Args)...); + } + template Task *AllocateTask(ArgsTy &&... Args) { + Task *Storage = TaskAllocator.Allocate(); + return new (Storage) Task(std::forward(Args)...); + } + + Task *getRootTask() const { return RootTask; } + + /// Return true if this function is "serial," meaning it does not itself + /// perform a detach. This method does not preclude functions called by this + /// function from performing a detach. + bool isSerial() const { + assert(getRootTask() && "Null root task\n"); + return getRootTask()->isSerial(); + } + + /// Return true if the analysis found child taskframes of this task. + bool foundChildTaskFrames() const { + assert(getRootTask() && "Null root task\n"); + return getRootTask()->foundChildTaskFrames(); + } + + /// iterator/begin/end - The interface to the top-level tasks in the current + /// function. + /// + using iterator = typename Task::iterator; + using const_iterator = typename Task::const_iterator; + using reverse_iterator = typename Task::reverse_iterator; + using const_reverse_iterator = typename Task::const_reverse_iterator; + inline iterator begin() const { return getRootTask()->begin(); } + inline iterator end() const { return getRootTask()->end(); } + inline reverse_iterator rbegin() const { return getRootTask()->rbegin(); } + inline reverse_iterator rend() const { return getRootTask()->rend(); } + inline bool empty() const { return getRootTask()->empty(); } + + /// Return the innermost spindle that BB lives in. + Spindle *getSpindleFor(const BasicBlock *BB) const { + return BBMap.lookup(BB); + } + + /// Return the innermost task that spindle F lives in. + Task *getTaskFor(const Spindle *S) const { return SpindleMap.lookup(S); } + /// Same as getTaskFor(S). + const Task *operator[](const Spindle *S) const { return getTaskFor(S); } + + /// Return the innermost task that BB lives in. + Task *getTaskFor(const BasicBlock *BB) const { + return getTaskFor(getSpindleFor(BB)); + } + /// Same as getTaskFor(BB). + const Task *operator[](const BasicBlock *BB) const { return getTaskFor(BB); } + + /// Return the taskframe spindle for the given task T. + Spindle *getTaskFrameSpindleFor(const Task *T) const { + Instruction *TaskFrame = + dyn_cast_or_null(T->getTaskFrameUsed()); + if (!TaskFrame) + return nullptr; + return getSpindleFor(TaskFrame->getParent()); + } + + /// Return the innermost task that encompases both basic blocks BB1 and BB2. + Task *getEnclosingTask(const BasicBlock *BB1, const BasicBlock *BB2) const { + return getTaskFor( + getRootTask()->DomTree.findNearestCommonDominator(BB1, BB2)); + } + + /// Return the innermost task that encompases both spindles S1 and S2. + Task *getEnclosingTask(const Spindle *S1, const Spindle *S2) const { + return getEnclosingTask(S1->getEntry(), S2->getEntry()); + } + + /// Return true if task T1 encloses task T2. + bool encloses(const Task *T1, const Task *T2) const { + if (!T1 || !T2) return false; + return getRootTask()->DomTree.dominates(T1->getEntry(), T2->getEntry()); + } + + /// Return true if task T encloses basic block BB. + bool encloses(const Task *T, const BasicBlock *BB) const { + if (!T) return false; + return T->encloses(BB); + } + + /// Return true if the task T encloses instruction Inst. + bool encloses(const Task *T, const Instruction *Inst) const { + return encloses(T, Inst->getParent()); + } + + /// Return the task nesting level of basic block BB. A depth of 0 means the + /// block is in the root task. + unsigned getTaskDepth(const BasicBlock *BB) const { + return getTaskFor(BB)->getTaskDepth(); + } + + /// True if basic block BB is a task entry block + bool isTaskEntry(const BasicBlock *BB) const { + return getTaskFor(BB)->getEntry() == BB; + } + + /// Traverse the graph of spindles to evaluate some parallel state. + template + void evaluateParallelState(StateT &State) const { + SetVector ToProcess; + + // This method performs the work-list algorithm for data-flow analysis on + // spindles. + + // First mark all defining spindles and spindles whose state is eagerly + // updated. + { + // Get the spindles in post order, so we can traverse them in RPO. + SmallVector POSpindles; + for (Spindle *S : post_order(getRootTask()->getEntrySpindle())) + POSpindles.push_back(S); + // SetVector DefSpindles; + for (Spindle *S : llvm::reverse(POSpindles)) + // If we find a defining spindle (or a spindle with an eagerly-updated + // state), add its successors for processing. + if (State.markDefiningSpindle(S)) + for (Spindle *Succ : successors(S)) + ToProcess.insert(Succ); + } + + // Perform the work-list algorithm to propagate data-flow information among + // the spindles. + { + SmallVector NextToProcess; + unsigned EvalNum = 0; + while (!ToProcess.empty()) { + // Process all spindles that need processing. + for (Spindle *Curr : ToProcess) + if (!State.evaluate(Curr, EvalNum)) + // If the state of this spindle changed, add its successors for + // future processing. + for (Spindle *Succ : successors(Curr)) + NextToProcess.push_back(Succ); + + // Get ready to Process the next set of spindles. + ToProcess.clear(); + ToProcess.insert(NextToProcess.begin(), NextToProcess.end()); + NextToProcess.clear(); + ++EvalNum; + } + } + } + + /// Check if a alloca AI is promotable based on task structure. + bool isAllocaParallelPromotable(const AllocaInst *AI) const; + + /// Check if the two basic blocks B1 and B2 may execute in parallel. + bool mayHappenInParallel(const BasicBlock *B1, const BasicBlock *B2) const { + // Common case: No blocks execute in parallel in a serial function. + if (isSerial()) + return false; + + // if (getTaskFor(B1) == getTaskFor(B2)) + // return false; + + // If necessary, compute which tasks may execute in parallel. + if (!MPTasks) { + MPTasks.reset(new MaybeParallelTasks()); + evaluateParallelState(*MPTasks); + } + + // Get the task Encl that encloses both basic blocks. + const Task *Encl = getEnclosingTask(B1, B2); + + // For each basic block, get the representative subtask of Encl that + // encloses that basic block. + const Task *B1Task = Encl->getSubTaskEnclosing(B1); + const Task *B2Task = Encl->getSubTaskEnclosing(B2); + + // Translate these representative tasks into spindles. + const Spindle *B1Spindle = getSpindleFor(B1); + const Spindle *B2Spindle = getSpindleFor(B2); + if (B1Task != Encl) + B1Spindle = getSpindleFor(B1Task->getDetach()->getParent()); + if (B2Task != Encl) + B2Spindle = getSpindleFor(B2Task->getDetach()->getParent()); + + // Evaluate the maybe-parallel task lists for the two representative + // spindles to determine if the blocks may execute in parallel. + return MPTasks->TaskList[B1Spindle].count(B2Task) || + MPTasks->TaskList[B2Spindle].count(B1Task); + } + + /// Create the task forest using a stable algorithm. + void analyze(Function &F, DominatorTree &DomTree); + + /// Compute the spindles and subtasks contained in all taskframes. + void findTaskFrameTree(); + + /// Handle invalidation explicitly. + bool invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &); + + // Debugging + void print(raw_ostream &OS) const; + + void verify(const DominatorTree &DomTree) const; + + /// Destroy a task that has been removed from the `TaskInfo` nest. + /// + /// This runs the destructor of the task object making it invalid to + /// reference afterward. The memory is retained so that the *pointer* to the + /// task remains valid. + /// + /// The caller is responsible for removing this task from the task nest and + /// otherwise disconnecting it from the broader `TaskInfo` data structures. + /// Callers that don't naturally handle this themselves should probably call + /// `erase' instead. + void destroy(Task *T) { + assert(T && "Cannot destroy a null task."); + T->~Task(); + + // Since TaskAllocator is a BumpPtrAllocator, this Deallocate only poisons + // \c T, but the pointer remains valid for non-dereferencing uses. + TaskAllocator.Deallocate(T); + } + + // Manually recalculate TaskInfo from the given dominator tree. + void recalculate(Function &F, DominatorTree &DomTree) { + releaseMemory(); + analyze(F, DomTree); + } + + // Create a spindle with entry block B and type Ty. + Spindle *createSpindleWithEntry(BasicBlock *B, Spindle::SPType Ty) { + Spindle *S = AllocateSpindle(B, Ty); + assert(!BBMap.count(B) && "BasicBlock already in a spindle!"); + BBMap[B] = S; + return S; + } + + // Create a task with spindle entry S. + Task *createTaskWithEntry(Spindle *S, DominatorTree &DomTree) { + Task *T = AllocateTask(*S, DomTree); + S->setParentTask(T); + assert(!SpindleMap.count(S) && "Spindle already in a task!"); + SpindleMap[S] = T; + return T; + } + + // Add spindle S to task T. + void addSpindleToTask(Spindle *S, Task *T) { + assert(!SpindleMap.count(S) && "Spindle already mapped to a task."); + T->addSpindle(*S); + S->setParentTask(T); + SpindleMap[S] = T; + } + + // Add spindle S to task T, where S is a shared exception-handling spindle + // among subtasks of T. + void addEHSpindleToTask(Spindle *S, Task *T) { + assert(!SpindleMap.count(S) && "Spindle already mapped to a task."); + T->addEHSpindle(*S); + S->setParentTask(T); + SpindleMap[S] = T; + } + + // Add basic block B to spindle S. + void addBlockToSpindle(BasicBlock &B, Spindle *S) { + assert(!BBMap.count(&B) && "Block already mapped to a spindle."); + S->addBlock(B); + BBMap[&B] = S; + } + + // Associate a task T with the spindle TFSpindle that creates its taskframe. + void AssociateTaskFrameWithUser(Task *T, Spindle *TFSpindle) { + TFSpindle->TaskFrameUser = T; + T->TaskFrameCreateSpindle = TFSpindle; + } +}; + +/// Enable verification of Tapir task info. +/// +/// The flag enables checks which are expensive and are disabled by default +/// unless the `EXPENSIVE_CHECKS` macro is defined. The `-verify-task-info` +/// flag allows the checks to be enabled selectively without re-compilation. +extern bool VerifyTaskInfo; + +/// Analysis pass that exposes the \c TaskInfo for a function. +class TaskAnalysis : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static AnalysisKey Key; + +public: + using Result = TaskInfo; + + TaskInfo run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Printer pass for the \c TaskAnalysis results. +class TaskPrinterPass : public PassInfoMixin { + raw_ostream &OS; + +public: + explicit TaskPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Verifier pass for the \c TaskAnalysis results. +struct TaskVerifierPass : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// The legacy pass manager's analysis pass to compute task information. +class TaskInfoWrapperPass : public FunctionPass { + TaskInfo TI; + +public: + static char ID; // Pass identification, replacement for typeid + + TaskInfoWrapperPass(); + + TaskInfo &getTaskInfo() { return TI; } + const TaskInfo &getTaskInfo() const { return TI; } + + /// Calculate the natural task information for a given function. + bool runOnFunction(Function &F) override; + + void verifyAnalysis() const override; + + void releaseMemory() override { TI.releaseMemory(); } + + void print(raw_ostream &O, const Module *M = nullptr) const override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; +}; + +/// Function to print a task's contents as LLVM's text IR assembly. +void printTask(Task &T, raw_ostream &OS, const std::string &Banner = ""); + +/// Examine a given loop to determine if it is structurally a Tapir loop. +/// Returns the Task that encodes the loop body if so, or nullptr if not. +Task *getTaskIfTapirLoopStructure(const Loop *L, TaskInfo *TI); + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h index 5d62e837c1f3d5..dfb432928c04cd 100644 --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -15,6 +15,7 @@ #include "llvm/IR/PassManager.h" #include "llvm/Pass.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include namespace llvm { @@ -56,6 +57,8 @@ class TargetLibraryInfoImpl { static StringLiteral const StandardNames[NumLibFuncs]; bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param, ShouldSignExtI32Return; unsigned SizeOfInt; + TapirTargetID TapirTarget = TapirTargetID::Last_TapirTargetID; + std::unique_ptr TTOptions = nullptr; enum AvailabilityState { StandardName = 3, // (memset to all ones) @@ -76,6 +79,9 @@ class TargetLibraryInfoImpl { /// on VectorFnName rather than ScalarFnName. std::vector ScalarDescs; + /// Tapir target standard functions + std::vector TapirTargetFuncs; + /// Return true if the function type FTy is valid for the library function /// F, regardless of whether the function is available. bool isValidProtoForLibFunc(const FunctionType &FTy, LibFunc F, @@ -228,6 +234,47 @@ class TargetLibraryInfoImpl { /// conventions. static bool isCallingConvCCompatible(CallBase *CI); static bool isCallingConvCCompatible(Function *Callee); + + /// Set the target for Tapir lowering. + void setTapirTarget(TapirTargetID TargetID) { + TapirTarget = TargetID; + } + + /// Return the ID of the target for Tapir lowering. + TapirTargetID getTapirTarget() const { + return TapirTarget; + } + + /// Return true if we have a nontrivial target for Tapir lowering. + bool hasTapirTarget() const { + return (TapirTarget != TapirTargetID::Last_TapirTargetID) && + (TapirTarget != TapirTargetID::None); + } + + /// Set options for Tapir lowering. + void setTapirTargetOptions(std::unique_ptr Options) { + std::swap(TTOptions, Options); + } + + /// Return any options for Tapir lowering. + TapirTargetOptions *getTapirTargetOptions() const { + return TTOptions.get(); + } + + /// Records known library functions associated with the specified Tapir + /// target. + void addTapirTargetLibraryFunctions() { + addTapirTargetLibraryFunctions(TapirTarget); + } + void addTapirTargetLibraryFunctions(TapirTargetID TargetID); + + /// Searches for a particular function name among known Tapir-target library + /// functions, also checking that its type is valid for the library function + /// matching that name. + /// + /// Return true if it is one of the known tapir-target library functions. + bool isTapirTargetLibFunc(StringRef funcName) const; + bool isTapirTargetLibFunc(const Function &FDecl) const; }; /// Provides information about what library functions are available for @@ -514,6 +561,29 @@ class TargetLibraryInfo { return Impl->getIntSize(); } + /// \copydoc TargetLibraryInfoImpl::getTapirTarget() + TapirTargetID getTapirTarget() const { + return Impl->getTapirTarget(); + } + + /// \copydoc TargetLibraryInfoImpl::hasTapirTarget() + bool hasTapirTarget() const { + return Impl->hasTapirTarget(); + } + + /// \copydoc TargetLibraryInfoImpl::getTapirTarget() + TapirTargetOptions *getTapirTargetOptions() const { + return Impl->getTapirTargetOptions(); + } + + /// \copydoc TargetLibraryInfoImpl::isTapirTargetLibFunc() + bool isTapirTargetLibFunc(StringRef funcName) const { + return Impl->isTapirTargetLibFunc(funcName); + } + bool isTapirTargetLibFunc(const Function &FDecl) const { + return Impl->isTapirTargetLibFunc(FDecl); + } + /// Handle invalidation from the pass manager. /// /// If we try to invalidate this info, just return false. It cannot become diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index 1ae595d2110457..42974aa180cca0 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -625,6 +625,29 @@ class TargetTransformInfo { TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const; + /// Parameters that control the generic loop stripmining transformation. + struct StripMiningPreferences { + /// A forced stripmining factor (the number of iterations of the original + /// loop in the stripmined inner-loop body). When set to 0, the stripmining + /// transformation will select an stripmining factor based on the current + /// cost threshold and other factors. + unsigned Count; + /// Allow emitting expensive instructions (such as divisions) when computing + /// the trip count of a loop for runtime unrolling. + bool AllowExpensiveTripCount; + /// Default factor for coarsening a task to amortize the cost of creating + /// it. + unsigned DefaultCoarseningFactor; + /// Allow unrolling of all the iterations of the runtime loop remainder. + bool UnrollRemainder; + }; + + /// Get target-customized preferences for the generic Tapir loop stripmining + /// transformation. The caller will initialize SMP with the current + /// target-independent defaults. + void getStripMiningPreferences(Loop *L, ScalarEvolution &, + StripMiningPreferences &SMP) const; + // Parameters that control the loop peeling transformation struct PeelingPreferences { /// A forced peeling factor (the number of bodied of the original loop @@ -1741,6 +1764,8 @@ class TargetTransformInfo::Concept { virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0; virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0; + virtual void getStripMiningPreferences(Loop *L, ScalarEvolution &, + StripMiningPreferences &SMP) = 0; virtual std::optional instCombineIntrinsic( InstCombiner &IC, IntrinsicInst &II) = 0; virtual std::optional simplifyDemandedUseBitsIntrinsic( @@ -2166,6 +2191,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override { return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow); } + void getStripMiningPreferences(Loop *L, ScalarEvolution &SE, + StripMiningPreferences &SMP) override { + return Impl.getStripMiningPreferences(L, SE, SMP); + } std::optional instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override { return Impl.instCombineIntrinsic(IC, II); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index 4ab33995618213..910c9f68556dd6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -210,6 +210,9 @@ class TargetTransformInfoImplBase { void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const {} + void getStripMiningPreferences(Loop *, ScalarEvolution &, + TTI::StripMiningPreferences &) const {} + bool isLegalAddImmediate(int64_t Imm) const { return false; } bool isLegalICmpImmediate(int64_t Imm) const { return false; } @@ -703,6 +706,12 @@ class TargetTransformInfoImplBase { case Intrinsic::coro_subfn_addr: case Intrinsic::threadlocal_address: case Intrinsic::experimental_widenable_condition: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: // These intrinsics don't actually represent code after lowering. return 0; } @@ -1366,6 +1375,11 @@ class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { Type *DstTy = Operands[0]->getType(); return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); } + case Instruction::Detach: + // Ideally, we'd determine the number of arguments of the detached task. + // But because that computation is expensive, we settle for 30x the basic + // cost of a function call. + return 30 * TTI::TCC_Basic; } // By default, just classify everything as 'basic' or -1 to represent that diff --git a/llvm/include/llvm/Analysis/WorkSpanAnalysis.h b/llvm/include/llvm/Analysis/WorkSpanAnalysis.h new file mode 100644 index 00000000000000..3763917bf4bad2 --- /dev/null +++ b/llvm/include/llvm/Analysis/WorkSpanAnalysis.h @@ -0,0 +1,57 @@ +//===- WorkSpanAnalysis.h - Analysis to estimate work and span --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis pass to estimate the work and span of the +// program. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_WORKSPANANALYSIS_H_ +#define LLVM_ANALYSIS_WORKSPANANALYSIS_H_ + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Support/InstructionCost.h" + +// TODO: Build a CGSCC pass based on these analyses to efficiently estimate the +// work and span of all the functions in a module. + +// TODO: Use BlockFrequencyInfo to improve how this analysis evaluates code with +// control flow. Specifically, the analysis should weight the work and span of +// a block based on the probabilities of its incoming edges, with special care +// given to detach, reattach, and continue edges. + +// TODO: Connect these analyses with a scalability profiler to implement PGO for +// Tapir. + +namespace llvm { +class Loop; +class LoopInfo; +class ScalarEvolution; +class TargetLibraryInfo; +class TargetTransformInfo; + +struct WSCost { + InstructionCost Work = 0; + InstructionCost Span = 0; + + bool UnknownCost = false; + + CodeMetrics Metrics; +}; + +// Get a constant trip count for the given loop. +unsigned getConstTripCount(const Loop *L, ScalarEvolution &SE); + +void estimateLoopCost(WSCost &LoopCost, const Loop *L, LoopInfo *LI, + ScalarEvolution *SE, const TargetTransformInfo &TTI, + TargetLibraryInfo *TLI, + const SmallPtrSetImpl &EphValues); +} + +#endif // LLVM_ANALYSIS_WORKSPANANALYSIS_H_ diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index eca908a24aac7b..1405f91427d60d 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -607,6 +607,9 @@ namespace llvm { bool parseCatchPad(Instruction *&Inst, PerFunctionState &PFS); bool parseCleanupPad(Instruction *&Inst, PerFunctionState &PFS); bool parseCallBr(Instruction *&Inst, PerFunctionState &PFS); + bool parseDetach(Instruction *&Inst, PerFunctionState &PFS); + bool parseReattach(Instruction *&Inst, PerFunctionState &PFS); + bool parseSync(Instruction *&Inst, PerFunctionState &PFS); bool parseUnaryOp(Instruction *&Inst, PerFunctionState &PFS, unsigned Opc, bool IsFP); diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h index 673dc58ce6451e..b83144b58ab8d9 100644 --- a/llvm/include/llvm/AsmParser/LLToken.h +++ b/llvm/include/llvm/AsmParser/LLToken.h @@ -340,6 +340,12 @@ enum Kind { kw_freeze, + // Tapir types + kw_detach, + kw_reattach, + // NOTE: kw_sync is already defined for a different context. + // kw_tsync, + // Metadata types. kw_distinct, diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index 52e76356a892e4..403a03d7449aa1 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -617,6 +617,9 @@ enum FunctionCodes { // operation, align, vol, // ordering, synchscope] FUNC_CODE_BLOCKADDR_USERS = 60, // BLOCKADDR_USERS: [value...] + FUNC_CODE_INST_DETACH = 61, // DETACH: [bb#,bb#] or [bb#,bb#,bb#] + FUNC_CODE_INST_REATTACH = 62, // REATTACH: [bb#] + FUNC_CODE_INST_SYNC = 63, // SYNC: [bb#] }; enum UseListCodes { @@ -713,6 +716,14 @@ enum AttributeKindCodes { ATTR_KIND_SKIP_PROFILE = 85, ATTR_KIND_MEMORY = 86, ATTR_KIND_NOFPCLASS = 87, + ATTR_KIND_SANITIZE_CILK = 88, + ATTR_KIND_STEALABLE = 89, + ATTR_KIND_STRAND_PURE = 90, + ATTR_KIND_STRAND_NO_ALIAS = 91, + ATTR_KIND_INJECTIVE = 92, + ATTR_KIND_HYPER_VIEW = 93, + ATTR_KIND_REDUCER_REGISTER = 94, + ATTR_KIND_REDUCER_UNREGISTER = 95, }; enum ComdatSelectionKindCodes { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h index 4d26af3e3e6d22..ed23177b166fb3 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -399,6 +399,12 @@ class IRTranslator : public MachineFunctionPass { bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder); + bool translateDetach(const User &U, MachineIRBuilder &MIRBuilder); + + bool translateReattach(const User &U, MachineIRBuilder &MIRBuilder); + + bool translateSync(const User &U, MachineIRBuilder &MIRBuilder); + bool translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder); bool translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder); diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h index 16e773c1864181..a571cc64889639 100644 --- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h +++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h @@ -715,6 +715,7 @@ struct MachineFunction { StringRef Name; MaybeAlign Alignment = std::nullopt; bool ExposesReturnsTwice = false; + bool ExposesOpaqueReturnsTwice = false; // GISel MachineFunctionProperties. bool Legalized = false; bool RegBankSelected = false; @@ -757,6 +758,7 @@ template <> struct MappingTraits { YamlIO.mapRequired("name", MF.Name); YamlIO.mapOptional("alignment", MF.Alignment, std::nullopt); YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice, false); + YamlIO.mapOptional("exposesOpaqueReturnsTwice", MF.ExposesOpaqueReturnsTwice, false); YamlIO.mapOptional("legalized", MF.Legalized, false); YamlIO.mapOptional("regBankSelected", MF.RegBankSelected, false); YamlIO.mapOptional("selected", MF.Selected, false); diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h index 7d11d63d4066f4..ca440aebbe34b7 100644 --- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h +++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h @@ -353,6 +353,7 @@ class MachineFrameInfo { /// selection is complete to determine if the stack frame for this function /// contains any variable sized objects. bool hasVarSizedObjects() const { return HasVarSizedObjects; } + void setHasVarSizedObjects(bool v = true) { HasVarSizedObjects = v; } /// Return the index for the stack protector object. int getStackProtectorIndex() const { return StackProtectorIdx; } diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h index 09f9ff60f95503..2093313bf5fa89 100644 --- a/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/llvm/include/llvm/CodeGen/MachineFunction.h @@ -327,6 +327,13 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { /// about the control flow of such functions. bool ExposesReturnsTwice = false; + /// ExposesOpaqueReturnsTwice - True if the function calls setjmp or related + /// functions with attribute "returns twice", other than LLVM's builtin + /// setjmp, but doesn't have the attribute itself. + /// This is used to limit optimizations which cannot reason + /// about the control flow of such functions. + bool ExposesOpaqueReturnsTwice = false; + /// True if the function includes any inline assembly. bool HasInlineAsm = false; @@ -777,6 +784,19 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction { ExposesReturnsTwice = B; } + /// exposesReturnsTwice - Returns true if the function calls a function with + /// attribute "returns twice" other than LLVM's builtin setjmp without having + /// the attribute itself. + bool exposesOpaqueReturnsTwice() const { + return ExposesOpaqueReturnsTwice; + } + + /// setExposesOpaqueReturnsTwice - Set a flag that indicates if there's a call + /// to a "returns twice" function other than LLVM's builtin setjmp. + void setExposesOpaqueReturnsTwice(bool B) { + ExposesOpaqueReturnsTwice = B; + } + /// Returns true if the function contains any inline assembly. bool hasInlineAsm() const { return HasInlineAsm; diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 11bc1d48a93d7c..6a16bfc66f8862 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -572,6 +572,11 @@ namespace llvm { /// Creates MIR Check Debug pass. \see MachineCheckDebugify.cpp ModulePass *createCheckDebugMachineModulePass(); +/// Clean up any remaining Tapir instructions. Typically, this pass should + /// have no effect, because Tapir instructions should have been lowered + /// already to a particular parallel runtime. + FunctionPass *createTapirCleanupPass(); + /// The pass fixups statepoint machine instruction to replace usage of /// caller saved registers with stack slots. extern char &FixupStatepointCallerSavedID; diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h index 8fdce301c0ccb1..125aa1be3c3e83 100644 --- a/llvm/include/llvm/CodeGen/TailDuplicator.h +++ b/llvm/include/llvm/CodeGen/TailDuplicator.h @@ -76,7 +76,8 @@ class TailDuplicator { bool tailDuplicateBlocks(); static bool isSimpleBB(MachineBasicBlock *TailBB); - bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB); + BlockDesc getBlockDesc(MachineBasicBlock *TailBB); + bool shouldTailDuplicate(BlockDesc const &Desc, MachineBasicBlock &TailBB); /// Returns true if TailBB can successfully be duplicated into PredBB bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB); @@ -89,7 +90,7 @@ class TailDuplicator { /// deleted. /// If \p CandidatePtr is not null, duplicate into these blocks only. bool tailDuplicateAndUpdate( - bool IsSimple, MachineBasicBlock *MBB, + const BlockDesc &Desc, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds = nullptr, function_ref *RemovalCallback = nullptr, @@ -116,7 +117,7 @@ class TailDuplicator { bool duplicateSimpleBB(MachineBasicBlock *TailBB, SmallVectorImpl &TDBBs, const DenseSet &RegsUsedByPhi); - bool tailDuplicate(bool IsSimple, + bool tailDuplicate(const BlockDesc &Desc, MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl &TDBBs, diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 93dfcfc399247e..182e46ea51580a 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -92,6 +92,21 @@ struct ExtAddrMode { int64_t Displacement; }; +struct BlockBRNZ { + // If true, the registers below are dead + bool IsKill = false; + // The register (or set of registers feeding into a PHI) that + // is tested against zero to determine the branch. + SmallVector Regs; + MachineBasicBlock *Zero = nullptr; // Target if register is zero + MachineBasicBlock *Nonzero = nullptr; // Target if register is not zero +}; + +struct BlockDesc { + bool IsSimple = false; + std::optional BRNZ; +}; + //--------------------------------------------------------------------------- /// /// TargetInstrInfo - Interface to description of machine instruction set @@ -697,6 +712,13 @@ class TargetInstrInfo : public MCInstrInfo { llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!"); } + /// Remove the branches at the end of the block and any compare + /// instructions used only by the branches. + virtual unsigned removeBranchAndFlags(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const { + return removeBranch(MBB, BytesRemoved); + } + /// Insert branch code into the end of the specified MachineBasicBlock. The /// operands to this method are the same as those returned by analyzeBranch. /// This is only invoked in cases where analyzeBranch returns success. It @@ -1630,6 +1652,19 @@ class TargetInstrInfo : public MCInstrInfo { } virtual bool optimizeCondBranch(MachineInstr &MI) const { return false; } + /// Return a descriptor if this block branches depending on whether a register + /// is nonzero. + virtual std::optional isZeroTest(MachineBasicBlock &MBB) const { + return std::optional(); + } + + /// If this instruction sets a register to a constant integer value, + /// return true, the register, and the value. + virtual bool isSetConstant(const MachineInstr &MI, Register &Reg, + int64_t &Value) const { + return false; + } + /// Try to remove the load by folding it to a register operand at the use. /// We fold the load instructions if and only if the /// def and use are in the same BB. We only look at one load and see diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 012ae2174cc2a1..ffe7843248a0a4 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -78,6 +78,15 @@ /* LLVM version string */ #define LLVM_VERSION_STRING "${PACKAGE_VERSION}" +/* Major version of the Tapir API */ +#define TAPIR_VERSION_MAJOR ${TAPIR_VERSION_MAJOR} + +/* Minor version of the Tapir API */ +#define TAPIR_VERSION_MINOR ${TAPIR_VERSION_MINOR} + +/* Patch version of the Tapir API */ +#define TAPIR_VERSION_PATCH ${TAPIR_VERSION_PATCH} + /* Whether LLVM records statistics for use with GetStatistics(), * PrintStatistics() or PrintStatisticsJSON() */ diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index 550e1699b22841..0d2bfbb4e61e2a 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -633,6 +633,36 @@ class PointerType final : public Node { } }; +class HyperobjectType final : public Node { + const Node *View; + +public: + HyperobjectType(const Node *View_) + : Node(KHyperobjectType, View_->RHSComponentCache), + View(View_) {} + + template void match(Fn F) const { F(View); } + + bool hasRHSComponentSlow(OutputBuffer &OB) const override { + return View->hasRHSComponent(OB); + } + + void printLeft(OutputBuffer &OB) const override { + View->printLeft(OB); + if (View->hasArray(OB)) + OB += " "; + if (View->hasArray(OB) || View->hasFunction(OB)) + OB += "("; + OB += " _Hyperobject"; + } + + void printRight(OutputBuffer &OB) const override { + if (View->hasArray(OB) || View->hasFunction(OB)) + OB += ")"; + View->printRight(OB); + } +}; + enum class ReferenceKind { LValue, RValue, @@ -4045,6 +4075,14 @@ Node *AbstractManglingParser::parseType() { Result = make(Ptr); break; } + case 'H': { + ++First; + Node *Ptr = getDerived().parseType(); + if (Ptr == nullptr) + return nullptr; + Result = make(Ptr); + break; + } // ::= R # l-value reference case 'R': { ++First; diff --git a/llvm/include/llvm/Demangle/ItaniumNodes.def b/llvm/include/llvm/Demangle/ItaniumNodes.def index c0e277d554ccfa..a5e4afc45646bd 100644 --- a/llvm/include/llvm/Demangle/ItaniumNodes.def +++ b/llvm/include/llvm/Demangle/ItaniumNodes.def @@ -26,6 +26,7 @@ NODE(ObjCProtoName) NODE(PointerType) NODE(ReferenceType) NODE(PointerToMemberType) +NODE(HyperobjectType) NODE(ArrayType) NODE(FunctionType) NODE(NoexceptSpec) diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td index aba1d718f7f72f..51da9f0e79dfa1 100644 --- a/llvm/include/llvm/IR/Attributes.td +++ b/llvm/include/llvm/IR/Attributes.td @@ -109,6 +109,9 @@ def FnRetThunkExtern : EnumAttr<"fn_ret_thunk_extern", [FnAttr]>; /// Pass structure in an alloca. def InAlloca : TypeAttr<"inalloca", [ParamAttr]>; +/// Distinct arguments to this function yield distinct return values. +def Injective : EnumAttr<"injective", [FnAttr]>; + /// Source said inlining was desirable. def InlineHint : EnumAttr<"inlinehint", [FnAttr]>; @@ -218,6 +221,11 @@ def ReadNone : EnumAttr<"readnone", [ParamAttr]>; /// Function only reads from memory. def ReadOnly : EnumAttr<"readonly", [ParamAttr]>; +/// Tapir reducer-related attributes. +def HyperView : EnumAttr<"hyper_view", [FnAttr]>; +def ReducerRegister : EnumAttr<"reducer_register", [FnAttr]>; +def ReducerUnregister : EnumAttr<"reducer_unregister", [FnAttr]>; + /// Return value is always equal to this argument. def Returned : EnumAttr<"returned", [ParamAttr]>; @@ -252,6 +260,13 @@ def StackProtectReq : EnumAttr<"sspreq", [FnAttr]>; /// Strong Stack protection. def StackProtectStrong : EnumAttr<"sspstrong", [FnAttr]>; +/// Multiple calls to this function in a strand return the same result. +def StrandPure : EnumAttr<"strand_pure", [FnAttr]>; + +/// This function acts like a system memory allocation function from +/// the perspective of memory accesses within the same strand. +def StrandNoAlias : EnumAttr<"strand_noalias", [ParamAttr, RetAttr]>; + /// Function was called in a scope requiring strict floating point semantics. def StrictFP : EnumAttr<"strictfp", [FnAttr]>; @@ -282,6 +297,14 @@ def SanitizeMemTag : EnumAttr<"sanitize_memtag", [FnAttr]>; def SpeculativeLoadHardening : EnumAttr<"speculative_load_hardening", [FnAttr]>; +/// CilkSanitizer is on. +def SanitizeCilk : EnumAttr<"sanitize_cilk", [FnAttr]>; + +/// From the Cilk perspective, a continuation in the function can be +/// stolen. This attribute is used ensure correct code generation for +/// such functions. +def Stealable : EnumAttr<"stealable", [FnAttr]>; + /// Argument is swift error. def SwiftError : EnumAttr<"swifterror", [ParamAttr]>; @@ -368,6 +391,7 @@ def : MergeRule<"setOR">; def : MergeRule<"setOR">; def : MergeRule<"setOR">; def : MergeRule<"setOR">; +def : MergeRule<"setOR">; def : MergeRule<"adjustCallerSSPLevel">; def : MergeRule<"adjustCallerStackProbes">; def : MergeRule<"adjustCallerStackProbeSize">; diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h index 19bf9549a8caec..386380305d5b1a 100644 --- a/llvm/include/llvm/IR/BasicBlock.h +++ b/llvm/include/llvm/IR/BasicBlock.h @@ -194,6 +194,16 @@ class BasicBlock final : public Value, // Basic blocks are data objects also SkipPseudoOp)); } + /// Returns a pointer to the first instruction in this block that is not a + /// PHINode, a debug intrinsic, or a sync.unwind intrinsic. + const Instruction * + getFirstNonPHIOrDbgOrSyncUnwind(bool SkipPseudoOp = false) const; + Instruction *getFirstNonPHIOrDbgOrSyncUnwind(bool SkipPseudoOp = false) { + return const_cast( + static_cast(this)->getFirstNonPHIOrDbgOrSyncUnwind( + SkipPseudoOp)); + } + /// Returns an iterator to the first instruction in this block that is /// suitable for inserting a non-PHI instruction. /// diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h index 203a73067edc7b..1a43307cae692b 100644 --- a/llvm/include/llvm/IR/DerivedTypes.h +++ b/llvm/include/llvm/IR/DerivedTypes.h @@ -257,6 +257,10 @@ class StructType : public Type { /// Create an empty structure type. static StructType *get(LLVMContext &Context, bool isPacked = false); + /// Try to lookup a structure type by name, and create one if one does not + /// exist. + static StructType *lookupOrCreate(LLVMContext &Context, StringRef Name); + /// This static method is a convenience method for creating structure types by /// specifying the elements as arguments. Note that this method always returns /// a non-packed struct, and requires at least one element type. diff --git a/llvm/include/llvm/IR/EHPersonalities.h b/llvm/include/llvm/IR/EHPersonalities.h index bd768440bfb9a6..db57a2d26c08c2 100644 --- a/llvm/include/llvm/IR/EHPersonalities.h +++ b/llvm/include/llvm/IR/EHPersonalities.h @@ -32,7 +32,8 @@ enum class EHPersonality { CoreCLR, Rust, Wasm_CXX, - XL_CXX + XL_CXX, + Cilk_CXX }; /// See if the given exception handling personality function is one diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 93cf0d27e9a73e..4a3dbfea469f01 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -567,6 +567,14 @@ class LLVM_EXTERNAL_VISIBILITY Function : public GlobalObject, addFnAttr(Attribute::Speculatable); } + /// Determine if the call is pure within a strand. + bool isStrandPure() const { + return hasFnAttribute(Attribute::StrandPure); + } + void setStrandPure() { + addFnAttr(Attribute::StrandPure); + } + /// Determine if the call might deallocate memory. bool doesNotFreeMemory() const { return onlyReadsMemory() || hasFnAttribute(Attribute::NoFree); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index f86ce845d1915f..4b192241801d89 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -1217,6 +1217,37 @@ class IRBuilderBase { return Insert(new UnreachableInst(Context)); } + /// \brief Create a detach instruction, + /// 'detach within SyncRegion, Detached, Continue'. + DetachInst *CreateDetach(BasicBlock *Detached, BasicBlock *Continue, + Value *SyncRegion, MDNode *BranchWeights = nullptr) { + return Insert(addBranchMetadata( + DetachInst::Create(Detached, Continue, SyncRegion), + BranchWeights, nullptr)); + } + + /// \brief Create a detach instruction, + /// 'detach within SyncRegion, Detached, Continue, Unwind'. + DetachInst *CreateDetach(BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind, Value *SyncRegion, + MDNode *BranchWeights = nullptr) { + return Insert(addBranchMetadata( + DetachInst::Create(Detached, Continue, Unwind, + SyncRegion), + BranchWeights, nullptr)); + } + + /// \brief Create a reattach instruction, 'reattach within SyncRegion, + /// DetachContinue'. + ReattachInst *CreateReattach(BasicBlock *DetachContinue, Value *SyncRegion) { + return Insert(ReattachInst::Create(DetachContinue, SyncRegion)); + } + + /// \brief Create a sync instruction, 'sync within SyncRegion, Continue'. + SyncInst *CreateSync(BasicBlock *Continue, Value *SyncRegion) { + return Insert(SyncInst::Create(Continue, SyncRegion)); + } + //===--------------------------------------------------------------------===// // Instruction creation methods: Binary Operators //===--------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/InstVisitor.h b/llvm/include/llvm/IR/InstVisitor.h index 311e0ac47ddfad..16be5682661ea6 100644 --- a/llvm/include/llvm/IR/InstVisitor.h +++ b/llvm/include/llvm/IR/InstVisitor.h @@ -250,6 +250,15 @@ class InstVisitor { RetTy visitCatchSwitchInst(CatchSwitchInst &I) { return static_cast(this)->visitTerminator(I); } + RetTy visitDetachInst(DetachInst &I) { + return static_cast(this)->visitTerminator(I); + } + RetTy visitReattachInst(ReattachInst &I) { + return static_cast(this)->visitTerminator(I); + } + RetTy visitSyncInst(SyncInst &I) { + return static_cast(this)->visitTerminator(I); + } RetTy visitTerminator(Instruction &I) { DELEGATE(Instruction);} // Next level propagators: If the user does not overload a specific diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 6095b0a1be69cb..076bc3ce9039fc 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1939,6 +1939,11 @@ class CallBase : public Instruction { void setConvergent() { addFnAttr(Attribute::Convergent); } void setNotConvergent() { removeFnAttr(Attribute::Convergent); } + /// Determine if the call or invoke is strand-pure. + bool isStrandPure() const { return hasFnAttr(Attribute::StrandPure); } + void setStrandPure() { addFnAttr(Attribute::StrandPure); } + void setNotStrandPure() { removeFnAttr(Attribute::StrandPure); } + /// Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { diff --git a/llvm/include/llvm/IR/Instruction.def b/llvm/include/llvm/IR/Instruction.def index a5ad92f58f94e3..1a594807c4b229 100644 --- a/llvm/include/llvm/IR/Instruction.def +++ b/llvm/include/llvm/IR/Instruction.def @@ -135,90 +135,93 @@ HANDLE_TERM_INST ( 8, CleanupRet , CleanupReturnInst) HANDLE_TERM_INST ( 9, CatchRet , CatchReturnInst) HANDLE_TERM_INST (10, CatchSwitch , CatchSwitchInst) HANDLE_TERM_INST (11, CallBr , CallBrInst) // A call-site terminator - LAST_TERM_INST (11) +HANDLE_TERM_INST (12, Detach , DetachInst) +HANDLE_TERM_INST (13, Reattach , ReattachInst) +HANDLE_TERM_INST (14, Sync , SyncInst) + LAST_TERM_INST (14) // Standard unary operators... - FIRST_UNARY_INST(12) -HANDLE_UNARY_INST(12, FNeg , UnaryOperator) - LAST_UNARY_INST(12) + FIRST_UNARY_INST(15) +HANDLE_UNARY_INST(15, FNeg , UnaryOperator) + LAST_UNARY_INST(15) // Standard binary operators... - FIRST_BINARY_INST(13) -HANDLE_BINARY_INST(13, Add , BinaryOperator) -HANDLE_BINARY_INST(14, FAdd , BinaryOperator) -HANDLE_BINARY_INST(15, Sub , BinaryOperator) -HANDLE_BINARY_INST(16, FSub , BinaryOperator) -HANDLE_BINARY_INST(17, Mul , BinaryOperator) -HANDLE_BINARY_INST(18, FMul , BinaryOperator) -HANDLE_BINARY_INST(19, UDiv , BinaryOperator) -HANDLE_BINARY_INST(20, SDiv , BinaryOperator) -HANDLE_BINARY_INST(21, FDiv , BinaryOperator) -HANDLE_BINARY_INST(22, URem , BinaryOperator) -HANDLE_BINARY_INST(23, SRem , BinaryOperator) -HANDLE_BINARY_INST(24, FRem , BinaryOperator) + FIRST_BINARY_INST(16) +HANDLE_BINARY_INST(16, Add , BinaryOperator) +HANDLE_BINARY_INST(17, FAdd , BinaryOperator) +HANDLE_BINARY_INST(18, Sub , BinaryOperator) +HANDLE_BINARY_INST(19, FSub , BinaryOperator) +HANDLE_BINARY_INST(20, Mul , BinaryOperator) +HANDLE_BINARY_INST(21, FMul , BinaryOperator) +HANDLE_BINARY_INST(22, UDiv , BinaryOperator) +HANDLE_BINARY_INST(23, SDiv , BinaryOperator) +HANDLE_BINARY_INST(24, FDiv , BinaryOperator) +HANDLE_BINARY_INST(25, URem , BinaryOperator) +HANDLE_BINARY_INST(26, SRem , BinaryOperator) +HANDLE_BINARY_INST(27, FRem , BinaryOperator) // Logical operators (integer operands) -HANDLE_BINARY_INST(25, Shl , BinaryOperator) // Shift left (logical) -HANDLE_BINARY_INST(26, LShr , BinaryOperator) // Shift right (logical) -HANDLE_BINARY_INST(27, AShr , BinaryOperator) // Shift right (arithmetic) -HANDLE_BINARY_INST(28, And , BinaryOperator) -HANDLE_BINARY_INST(29, Or , BinaryOperator) -HANDLE_BINARY_INST(30, Xor , BinaryOperator) - LAST_BINARY_INST(30) +HANDLE_BINARY_INST(28, Shl , BinaryOperator) // Shift left (logical) +HANDLE_BINARY_INST(29, LShr , BinaryOperator) // Shift right (logical) +HANDLE_BINARY_INST(30, AShr , BinaryOperator) // Shift right (arithmetic) +HANDLE_BINARY_INST(31, And , BinaryOperator) +HANDLE_BINARY_INST(32, Or , BinaryOperator) +HANDLE_BINARY_INST(33, Xor , BinaryOperator) + LAST_BINARY_INST(33) // Memory operators... - FIRST_MEMORY_INST(31) -HANDLE_MEMORY_INST(31, Alloca, AllocaInst) // Stack management -HANDLE_MEMORY_INST(32, Load , LoadInst ) // Memory manipulation instrs -HANDLE_MEMORY_INST(33, Store , StoreInst ) -HANDLE_MEMORY_INST(34, GetElementPtr, GetElementPtrInst) -HANDLE_MEMORY_INST(35, Fence , FenceInst ) -HANDLE_MEMORY_INST(36, AtomicCmpXchg , AtomicCmpXchgInst ) -HANDLE_MEMORY_INST(37, AtomicRMW , AtomicRMWInst ) - LAST_MEMORY_INST(37) + FIRST_MEMORY_INST(34) +HANDLE_MEMORY_INST(34, Alloca, AllocaInst) // Stack management +HANDLE_MEMORY_INST(35, Load , LoadInst ) // Memory manipulation instrs +HANDLE_MEMORY_INST(36, Store , StoreInst ) +HANDLE_MEMORY_INST(37, GetElementPtr, GetElementPtrInst) +HANDLE_MEMORY_INST(38, Fence , FenceInst ) +HANDLE_MEMORY_INST(39, AtomicCmpXchg , AtomicCmpXchgInst ) +HANDLE_MEMORY_INST(40, AtomicRMW , AtomicRMWInst ) + LAST_MEMORY_INST(40) // Cast operators ... // NOTE: The order matters here because CastInst::isEliminableCastPair // NOTE: (see Instructions.cpp) encodes a table based on this ordering. - FIRST_CAST_INST(38) -HANDLE_CAST_INST(38, Trunc , TruncInst ) // Truncate integers -HANDLE_CAST_INST(39, ZExt , ZExtInst ) // Zero extend integers -HANDLE_CAST_INST(40, SExt , SExtInst ) // Sign extend integers -HANDLE_CAST_INST(41, FPToUI , FPToUIInst ) // floating point -> UInt -HANDLE_CAST_INST(42, FPToSI , FPToSIInst ) // floating point -> SInt -HANDLE_CAST_INST(43, UIToFP , UIToFPInst ) // UInt -> floating point -HANDLE_CAST_INST(44, SIToFP , SIToFPInst ) // SInt -> floating point -HANDLE_CAST_INST(45, FPTrunc , FPTruncInst ) // Truncate floating point -HANDLE_CAST_INST(46, FPExt , FPExtInst ) // Extend floating point -HANDLE_CAST_INST(47, PtrToInt, PtrToIntInst) // Pointer -> Integer -HANDLE_CAST_INST(48, IntToPtr, IntToPtrInst) // Integer -> Pointer -HANDLE_CAST_INST(49, BitCast , BitCastInst ) // Type cast -HANDLE_CAST_INST(50, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast - LAST_CAST_INST(50) - - FIRST_FUNCLETPAD_INST(51) -HANDLE_FUNCLETPAD_INST(51, CleanupPad, CleanupPadInst) -HANDLE_FUNCLETPAD_INST(52, CatchPad , CatchPadInst) - LAST_FUNCLETPAD_INST(52) + FIRST_CAST_INST(41) +HANDLE_CAST_INST(41, Trunc , TruncInst ) // Truncate integers +HANDLE_CAST_INST(42, ZExt , ZExtInst ) // Zero extend integers +HANDLE_CAST_INST(43, SExt , SExtInst ) // Sign extend integers +HANDLE_CAST_INST(44, FPToUI , FPToUIInst ) // floating point -> UInt +HANDLE_CAST_INST(45, FPToSI , FPToSIInst ) // floating point -> SInt +HANDLE_CAST_INST(46, UIToFP , UIToFPInst ) // UInt -> floating point +HANDLE_CAST_INST(47, SIToFP , SIToFPInst ) // SInt -> floating point +HANDLE_CAST_INST(48, FPTrunc , FPTruncInst ) // Truncate floating point +HANDLE_CAST_INST(49, FPExt , FPExtInst ) // Extend floating point +HANDLE_CAST_INST(50, PtrToInt, PtrToIntInst) // Pointer -> Integer +HANDLE_CAST_INST(51, IntToPtr, IntToPtrInst) // Integer -> Pointer +HANDLE_CAST_INST(52, BitCast , BitCastInst ) // Type cast +HANDLE_CAST_INST(53, AddrSpaceCast, AddrSpaceCastInst) // addrspace cast + LAST_CAST_INST(53) + + FIRST_FUNCLETPAD_INST(54) +HANDLE_FUNCLETPAD_INST(54, CleanupPad, CleanupPadInst) +HANDLE_FUNCLETPAD_INST(55, CatchPad , CatchPadInst) + LAST_FUNCLETPAD_INST(55) // Other operators... - FIRST_OTHER_INST(53) -HANDLE_OTHER_INST(53, ICmp , ICmpInst ) // Integer comparison instruction -HANDLE_OTHER_INST(54, FCmp , FCmpInst ) // Floating point comparison instr. -HANDLE_OTHER_INST(55, PHI , PHINode ) // PHI node instruction -HANDLE_OTHER_INST(56, Call , CallInst ) // Call a function -HANDLE_OTHER_INST(57, Select , SelectInst ) // select instruction -HANDLE_USER_INST (58, UserOp1, Instruction) // May be used internally in a pass -HANDLE_USER_INST (59, UserOp2, Instruction) // Internal to passes only -HANDLE_OTHER_INST(60, VAArg , VAArgInst ) // vaarg instruction -HANDLE_OTHER_INST(61, ExtractElement, ExtractElementInst)// extract from vector -HANDLE_OTHER_INST(62, InsertElement, InsertElementInst) // insert into vector -HANDLE_OTHER_INST(63, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. -HANDLE_OTHER_INST(64, ExtractValue, ExtractValueInst)// extract from aggregate -HANDLE_OTHER_INST(65, InsertValue, InsertValueInst) // insert into aggregate -HANDLE_OTHER_INST(66, LandingPad, LandingPadInst) // Landing pad instruction. -HANDLE_OTHER_INST(67, Freeze, FreezeInst) // Freeze instruction. - LAST_OTHER_INST(67) + FIRST_OTHER_INST(56) +HANDLE_OTHER_INST(56, ICmp , ICmpInst ) // Integer comparison instruction +HANDLE_OTHER_INST(57, FCmp , FCmpInst ) // Floating point comparison instr. +HANDLE_OTHER_INST(58, PHI , PHINode ) // PHI node instruction +HANDLE_OTHER_INST(59, Call , CallInst ) // Call a function +HANDLE_OTHER_INST(60, Select , SelectInst ) // select instruction +HANDLE_USER_INST (61, UserOp1, Instruction) // May be used internally in a pass +HANDLE_USER_INST (62, UserOp2, Instruction) // Internal to passes only +HANDLE_OTHER_INST(63, VAArg , VAArgInst ) // vaarg instruction +HANDLE_OTHER_INST(64, ExtractElement, ExtractElementInst)// extract from vector +HANDLE_OTHER_INST(65, InsertElement, InsertElementInst) // insert into vector +HANDLE_OTHER_INST(66, ShuffleVector, ShuffleVectorInst) // shuffle two vectors. +HANDLE_OTHER_INST(67, ExtractValue, ExtractValueInst)// extract from aggregate +HANDLE_OTHER_INST(68, InsertValue, InsertValueInst) // insert into aggregate +HANDLE_OTHER_INST(69, LandingPad, LandingPadInst) // Landing pad instruction. +HANDLE_OTHER_INST(70, Freeze, FreezeInst) // Freeze instruction. + LAST_OTHER_INST(70) #undef FIRST_TERM_INST #undef HANDLE_TERM_INST diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 5fd8b27447b77d..6b6294ba9219e4 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -666,6 +666,7 @@ class Instruction : public User, // This list should be kept in sync with the list in mayWriteToMemory for // all opcodes which don't have a memory location. case Instruction::Fence: + case Instruction::Sync: // Like Instruction::Fence case Instruction::CatchPad: case Instruction::CatchRet: case Instruction::Call: @@ -723,6 +724,9 @@ class Instruction : public User, /// Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst. bool isDebugOrPseudoInst() const LLVM_READONLY; + /// Return true if the instruction is a llvm.taskframe marker. + bool isTaskFrameMarker() const; + /// Return a pointer to the next non-debug instruction in the same basic /// block as 'this', or nullptr if no such instruction exists. Skip any pseudo /// operations if \c SkipPseudoOp is true. diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h index 8d60384e1a32fc..04bed233965904 100644 --- a/llvm/include/llvm/IR/Instructions.h +++ b/llvm/include/llvm/IR/Instructions.h @@ -4819,6 +4819,277 @@ class UnreachableInst : public Instruction { } }; +//===----------------------------------------------------------------------===// +// DetachInst Class +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------------- +/// DetachInst - Detach instruction +/// +class DetachInst : public Instruction { + using UnwindDestField = BoolBitfieldElementT<0>; + + /// Ops list - The operands are ordered: + /// SyncRegion, Detached, Continue[, Unwind] + DetachInst(const DetachInst &DI); + void AssertOK(); + // DetachInst constructors (where {D, C, U} are blocks and SR is a token): + // DetachInst(BB *D, BB *C, Value *SR) - 'detach SR, D, C' + // DetachInst(BB *D, BB *C, Value *SR, Inst *I) + // - 'detach SR, D, C', insert before I + // DetachInst(BB *D, BB *C, Value *SR, BB *I) + // - 'detach SR, D, C', insert at end + DetachInst(BasicBlock *Detached, BasicBlock *Continue, Value *SyncRegion, + Instruction *InsertBefore = nullptr); + DetachInst(BasicBlock *Detached, BasicBlock *Continue, Value *SyncRegion, + BasicBlock *InsertAtEnd); + // DetachInst(BB *D, BB *C, BB *U, Value *SR) - 'detach SR, D, C, U' + // DetachInst(BB *D, BB *C, BB *U, Value *SR, Inst *I) + // - 'detach SR, D, C, U', insert before I + // DetachInst(BB *D, BB *C, BB *U, Value *SR, BB *I) + // - 'detach SR, D, C, U', insert at end + DetachInst(BasicBlock *Detached, BasicBlock *Continue, BasicBlock *Unwind, + Value *SyncRegion, Instruction *InsertBefore = nullptr); + DetachInst(BasicBlock *Detached, BasicBlock *Continue, BasicBlock *Unwind, + Value *SyncRegion, BasicBlock *InsertAtEnd); + +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + DetachInst *cloneImpl() const; + +public: + static DetachInst *Create(BasicBlock *Detached, BasicBlock *Continue, + Value *SyncRegion, + Instruction *InsertBefore = nullptr) { + return new(3) DetachInst(Detached, Continue, SyncRegion, InsertBefore); + } + static DetachInst *Create(BasicBlock *Detached, BasicBlock *Continue, + Value *SyncRegion, BasicBlock *InsertAtEnd) { + return new(3) DetachInst(Detached, Continue, SyncRegion, InsertAtEnd); + } + static DetachInst *Create(BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind, Value *SyncRegion, + Instruction *InsertBefore = nullptr) { + return new(4) DetachInst(Detached, Continue, Unwind, SyncRegion, + InsertBefore); + } + static DetachInst *Create(BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind, Value *SyncRegion, + BasicBlock *InsertAtEnd) { + return new(4) DetachInst(Detached, Continue, Unwind, SyncRegion, + InsertAtEnd); + } + + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + Value *getSyncRegion() const { + return Op<-1>(); + } + + void setSyncRegion(Value *SyncRegion) { + Op<-1>() = SyncRegion; + } + + unsigned getNumSuccessors() const { return 2 + hasUnwindDest(); } + + BasicBlock *getSuccessor(unsigned i) const { + assert(i < getNumSuccessors() && "Successor # out of range for detach!"); + return cast((&Op<-2>() - i)->get()); + } + + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + assert(idx < getNumSuccessors() && "Successor # out of range for detach!"); + *(&Op<-2>() - idx) = reinterpret_cast(NewSucc); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return (I->getOpcode() == Instruction::Detach); + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + BasicBlock *getDetached() const { return getSuccessor(0); } + BasicBlock *getContinue() const { return getSuccessor(1); } + bool hasUnwindDest() const { return getSubclassData(); } + BasicBlock *getUnwindDest() const { + if (hasUnwindDest()) + return getSuccessor(2); + return nullptr; + } + void setUnwindDest(BasicBlock *Unwind) { + assert(hasUnwindDest() && Unwind && + "Invalid unwind destination for detach."); + setSuccessor(2, Unwind); + } + + /// Get the landingpad instruction from the landing pad + /// block (the unwind destination). + LandingPadInst *getLandingPadInst() const; + +private: + void init(Value *SyncRegion, BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind = nullptr); +}; + +template <> +struct OperandTraits : public VariadicOperandTraits { +}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(DetachInst, Value) + +//===----------------------------------------------------------------------===// +// ReattachInst Class +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------------- +/// ReattachInst - Reattach instruction. This instruction terminates +/// a subCFG and has no successors. The DetachContinue field +/// maintains the continue block after the detach instruction +/// corresponding to this reattach. +/// +class ReattachInst : public Instruction { + ReattachInst(const ReattachInst &RI); + void AssertOK(); + // ReattachInst constructors (where C is a block and SR is a token): + // ReattachInst(BB *C, Value *SR) - 'reattach SR, C' + // ReattachInst(BB *C, Value *SR, Inst *I) - 'reattach SR, C', insert before I + // ReattachInst(BB *C, Value *SR, BB *I) - 'reattach SR, C', insert at end + explicit ReattachInst(BasicBlock *DetachContinue, Value *SyncRegion, + Instruction *InsertBefore = nullptr); + ReattachInst(BasicBlock *DetachContinue, Value *SyncRegion, + BasicBlock *InsertAtEnd); +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + ReattachInst *cloneImpl() const; + +public: + static ReattachInst *Create(BasicBlock *DetachContinue, Value *SyncRegion, + Instruction *InsertBefore = nullptr) { + return new(2) ReattachInst(DetachContinue, SyncRegion, InsertBefore); + } + + static ReattachInst *Create(BasicBlock *DetachContinue, Value *SyncRegion, + BasicBlock *InsertAtEnd) { + return new(2) ReattachInst(DetachContinue, SyncRegion, InsertAtEnd); + } + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + Value *getSyncRegion() const { + return Op<-1>(); + } + + void setSyncRegion(Value *SyncRegion) { + Op<-1>() = SyncRegion; + } + + unsigned getNumSuccessors() const { return 1; } + + BasicBlock *getDetachContinue() const { + return getSuccessor(0); + } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Reattach; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + BasicBlock *getSuccessor(unsigned i) const { + assert(i < getNumSuccessors() && "Successor # out of range for reattach!"); + return cast((&Op<-2>() - i)->get()); + } + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + assert(idx < getNumSuccessors() && + "Successor # out of range for reattach!"); + *(&Op<-2>() - idx) = reinterpret_cast(NewSucc); + } +}; + +template <> +struct OperandTraits : public VariadicOperandTraits { +}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReattachInst, Value) + +//===----------------------------------------------------------------------===// +// SyncInst Class +//===----------------------------------------------------------------------===// + +//===--------------------------------------------------------------------------- +/// SyncInst - Sync instruction. +/// +class SyncInst : public Instruction { + /// Ops list - A sync looks like an unconditional branch to its continuation. + SyncInst(const SyncInst &SI); + void AssertOK(); + // SyncInst constructor (where C is a block and SR is a token): + // SyncInst(BB *C, Value *SR) - 'sync SR, C' + // SyncInst(BB *C, Value *SR, Inst *I) - 'sync SR, C' insert before I + // SyncInst(BB *C, Value *SR, BB *I) - 'sync SR, C' insert at end + explicit SyncInst(BasicBlock *Continue, Value *SyncRegion, + Instruction *InsertBefore = nullptr); + SyncInst(BasicBlock *Continue, Value *SyncRegion, + BasicBlock *InsertAtEnd); +protected: + // Note: Instruction needs to be a friend here to call cloneImpl. + friend class Instruction; + SyncInst *cloneImpl() const; + +public: + static SyncInst *Create(BasicBlock *Continue, Value *SyncRegion, + Instruction *InsertBefore = nullptr) { + return new(2) SyncInst(Continue, SyncRegion, InsertBefore); + } + static SyncInst *Create(BasicBlock *Continue, Value *SyncRegion, + BasicBlock *InsertAtEnd) { + return new(2) SyncInst(Continue, SyncRegion, InsertAtEnd); + } + + /// Transparently provide more efficient getOperand methods. + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + + Value *getSyncRegion() const { + return Op<-1>(); + } + + void setSyncRegion(Value *SyncRegion) { + Op<-1>() = SyncRegion; + } + + unsigned getNumSuccessors() const { return 1; } + + // Methods for support type inquiry through isa, cast, and dyn_cast: + static inline bool classof(const Instruction *I) { + return I->getOpcode() == Instruction::Sync; + } + static inline bool classof(const Value *V) { + return isa(V) && classof(cast(V)); + } + + BasicBlock *getSuccessor(unsigned i) const { + assert(i < getNumSuccessors() && "Successor # out of range for sync!"); + return cast((&Op<-2>() - i)->get()); + } + void setSuccessor(unsigned idx, BasicBlock *NewSucc) { + assert(idx < getNumSuccessors() && "Successor # out of range for sync!"); + *(&Op<-2>() - idx) = reinterpret_cast(NewSucc); + } +}; + +template <> +struct OperandTraits : public VariadicOperandTraits { +}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SyncInst, Value) + //===----------------------------------------------------------------------===// // TruncInst Class //===----------------------------------------------------------------------===// diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 62bd833198f022..f7d215b511bc0d 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -89,6 +89,7 @@ class IntrinsicInst : public CallInst { bool isAssumeLikeIntrinsic() const { switch (getIntrinsicID()) { default: break; + case Intrinsic::annotation: case Intrinsic::assume: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: @@ -98,12 +99,31 @@ class IntrinsicInst : public CallInst { case Intrinsic::dbg_label: case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::objectsize: case Intrinsic::ptr_annotation: case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_subfn_addr: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: return true; } return false; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index e51c04fbad2f42..b09a7e1ff1b577 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -60,6 +60,25 @@ def Commutative : IntrinsicProperty; // Throws - This intrinsic can throw. def Throws : IntrinsicProperty; +// Injective - This intrinsic returns different values given different +// arguments. +def IntrInjective : IntrinsicProperty; +// Strand pure - (Tapir) This intrinsic has no visible side effects and +// returns the same result given the same argument, but only within a +// single strand of execution. +def IntrStrandPure : IntrinsicProperty; +// HyperView - (Tapir) This intrinsic lowers to a runtime call to +// find the address of the current view of a hyperobject. It does not +// return the address of anything in the current frame other than its +// argument. +def IntrHyperView : IntrinsicProperty; +// ReducerRegister / ReducerUnregister - (Tapir) This intrinsic registers +// or unregisters a reducer. These calls have no side effects on visible +// memory but can not be moved past other reducer and hyperobject calls +// if the arguments may alias. +def IntrReducerRegister : IntrinsicProperty; +def IntrReducerUnregister : IntrinsicProperty; + // Attribute index needs to match `AttrIndex` defined `Attributes.h`. class AttrIndex { int Value = idx; @@ -1668,6 +1687,82 @@ def int_coro_subfn_addr : DefaultAttrsIntrinsic< [IntrReadMem, IntrArgMemOnly, ReadOnly>, NoCapture>]>; +///===-------------------------- Tapir Intrinsics -------------------------===// +// +def int_syncregion_start + : Intrinsic<[llvm_token_ty], [], [IntrArgMemOnly, IntrWillReturn]>; + +def int_tapir_runtime_start + : Intrinsic<[llvm_token_ty], [], [IntrArgMemOnly, IntrWillReturn]>; + +def int_tapir_runtime_end + : Intrinsic<[], [llvm_token_ty], [IntrArgMemOnly, IntrWillReturn]>; + +// Intrinsics for taskframes. + +// Marker for the start of a taskframe. +def int_taskframe_create + : Intrinsic<[llvm_token_ty], [], [IntrArgMemOnly, IntrWillReturn]>; + +// Marker placed in detached blocks (i.e., task-entry blocks) to +// identify the taskframe used by the spawned task. +def int_taskframe_use + : Intrinsic<[], [llvm_token_ty], [IntrArgMemOnly, IntrWillReturn]>; + +// Marker for the end of a taskframe. +def int_taskframe_end + : Intrinsic<[], [llvm_token_ty], [IntrArgMemOnly, IntrWillReturn]>; + +// Marker for the end of a taskframe along exception-handling unwind +// paths. +def int_taskframe_resume : Intrinsic<[], [llvm_token_ty, llvm_any_ty], + [IntrArgMemOnly, IntrWillReturn, Throws]>; + +// Guard intrinsic to prevent illegal code motion of loads from memory +// locations stored in spawned subtasks. +def int_taskframe_load_guard + : Intrinsic<[], [llvm_anyptr_ty], + [IntrArgMemOnly, IntrWillReturn, NoCapture>]>; + +// Marker for the end of a spawned task along exception-handling +// unwind paths. +def int_detached_rethrow : Intrinsic<[], [llvm_token_ty, llvm_any_ty], + [IntrArgMemOnly, IntrWillReturn, Throws]>; + +// Invokable intrinsic to keep track of a landingpad associated with a +// sync. +def int_sync_unwind + : Intrinsic<[], [llvm_token_ty], [IntrArgMemOnly, IntrWillReturn, Throws]>; + +// Intrinsic to get the grainsize of a Tapir loop. +def int_tapir_loop_grainsize + : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrWillReturn, IntrSpeculatable]>; + +// Intrinsic to get the frame address of a spawned task. Tapir +// lowering transforms this intrinsic into ordinary frameaddress +// intrinsics. +def int_task_frameaddress + : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrWillReturn]>; + +// Ideally the types would be [llvm_anyptr_ty], [LLVMMatchType<0>] +// but that does not work, so rely on the front end to insert bitcasts. +def int_hyper_lookup + : Intrinsic<[llvm_ptr_ty], + [llvm_ptr_ty, llvm_anyint_ty, llvm_ptr_ty, llvm_ptr_ty], [ + IntrWillReturn, IntrReadMem, IntrInaccessibleMemOnly, + IntrStrandPure, IntrHyperView, IntrInjective + ]>; + +// TODO: Change tablegen to allow function pointer types in intrinsics. +def int_reducer_register + : Intrinsic<[], [llvm_ptr_ty, llvm_anyint_ty, llvm_ptr_ty, llvm_ptr_ty], + [IntrWillReturn, IntrInaccessibleMemOnly, IntrReducerRegister]>; + +def int_reducer_unregister + : Intrinsic<[], [llvm_ptr_ty], [IntrWillReturn, IntrInaccessibleMemOnly, + IntrReducerUnregister]>; + ///===-------------------------- Other Intrinsics --------------------------===// // def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold]>, @@ -1732,9 +1827,11 @@ def int_ptrmask: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; // Intrinsic to wrap a thread local variable. -def int_threadlocal_address : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], - [NonNull, NonNull>, - IntrNoMem, IntrSpeculatable, IntrWillReturn]>; +def int_threadlocal_address + : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>], [ + NonNull, NonNull>, IntrInaccessibleMemOnly, + IntrReadMem, IntrSpeculatable, IntrWillReturn + ]>; def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [], [IntrNoMem]>; diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index c6fee47b464b95..fbab73fb88301b 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -48,6 +48,9 @@ void initializeGlobalISel(PassRegistry&); /// Initialize all passes linked into the CodeGen library. void initializeTarget(PassRegistry&); +/// Initialize all passes linked into the TapirOpts library. +void initializeTapirOpts(PassRegistry&); + void initializeAAEvalLegacyPassPass(PassRegistry&); void initializeAAResultsWrapperPassPass(PassRegistry&); void initializeAlwaysInlinerLegacyPassPass(PassRegistry&); @@ -81,14 +84,18 @@ void initializeCallGraphPrinterLegacyPassPass(PassRegistry&); void initializeCallGraphViewerPass(PassRegistry&); void initializeCallGraphWrapperPassPass(PassRegistry&); void initializeCheckDebugMachineModulePass(PassRegistry &); +void initializeCilkSanitizerLegacyPassPass(PassRegistry&); void initializeCodeGenPreparePass(PassRegistry&); void initializeComplexDeinterleavingLegacyPassPass(PassRegistry&); +void initializeComprehensiveStaticInstrumentationLegacyPassPass(PassRegistry&); void initializeConstantHoistingLegacyPassPass(PassRegistry&); void initializeCostModelAnalysisPass(PassRegistry&); void initializeCycleInfoWrapperPassPass(PassRegistry &); void initializeDAEPass(PassRegistry&); void initializeDAHPass(PassRegistry&); void initializeDCELegacyPassPass(PassRegistry&); +void initializeDRFAAWrapperPassPass(PassRegistry&); +void initializeDRFScopedNoAliasWrapperPassPass(PassRegistry&); void initializeDeadMachineInstructionElimPass(PassRegistry&); void initializeDebugifyMachineModulePass(PassRegistry &); void initializeDelinearizationPass(PassRegistry&); @@ -182,7 +189,10 @@ void initializeLoopPredicationLegacyPassPass(PassRegistry&); void initializeLoopRotateLegacyPassPass(PassRegistry&); void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&); void initializeLoopSimplifyPass(PassRegistry&); +void initializeLoopSpawningPass(PassRegistry&); +void initializeLoopSpawningTIPass(PassRegistry&); void initializeLoopStrengthReducePass(PassRegistry&); +void initializeLoopStripMinePass(PassRegistry&); void initializeLoopUnrollPass(PassRegistry&); void initializeLowerAtomicLegacyPassPass(PassRegistry&); void initializeLowerConstantIntrinsicsPass(PassRegistry&); @@ -194,6 +204,7 @@ void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&); void initializeLowerIntrinsicsPass(PassRegistry&); void initializeLowerInvokeLegacyPassPass(PassRegistry&); void initializeLowerSwitchLegacyPassPass(PassRegistry &); +void initializeLowerTapirToTargetPass(PassRegistry&); void initializeKCFIPass(PassRegistry &); void initializeMIRAddFSDiscriminatorsPass(PassRegistry &); void initializeMIRCanonicalizerPass(PassRegistry &); @@ -298,6 +309,7 @@ void initializeScalarizerLegacyPassPass(PassRegistry&); void initializeScavengerTestPass(PassRegistry&); void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&); void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &); +void initializeSerializeSmallTasksPass(PassRegistry&); void initializeShadowStackGCLoweringPass(PassRegistry&); void initializeShrinkWrapPass(PassRegistry&); void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&); @@ -323,6 +335,11 @@ void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); void initializeTargetTransformInfoWrapperPassPass(PassRegistry&); +void initializeTapirCleanupPass(PassRegistry&); +void initializeTapirRaceDetectWrapperPassPass(PassRegistry&); +void initializeTaskInfoWrapperPassPass(PassRegistry&); +void initializeTaskCanonicalizePass(PassRegistry&); +void initializeTaskSimplifyPass(PassRegistry&); void initializeTLSVariableHoistLegacyPassPass(PassRegistry &); void initializeTwoAddressInstructionPassPass(PassRegistry&); void initializeTypeBasedAAWrapperPassPass(PassRegistry&); diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 5c23ba4f7ac498..ad220451664089 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -23,6 +23,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CodeGen.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" #include #include @@ -91,6 +92,12 @@ struct Config { /// 'ELF' scheme. VisScheme VisibilityScheme = FromPrevailing; + /// Target for lowering Tapir constructs + TapirTargetID TapirTarget = TapirTargetID::None; + + // Path to OpenCilk runtime bitcode file. + std::string OpenCilkABIBitcodeFile; + /// If this field is set, the set of passes run in the middle-end optimizer /// will be the one specified by the string. Only works with the new pass /// manager as the old one doesn't have this ability. diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index 7420ea64e95435..c98725949b37b4 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DataRaceFreeAliasAnalysis.h" #include "llvm/Analysis/DomPrinter.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IntervalPartition.h" @@ -50,6 +51,7 @@ #include "llvm/Transforms/Scalar/InstSimplifyPass.h" #include "llvm/Transforms/Scalar/Scalarizer.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Tapir.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Vectorize.h" @@ -73,12 +75,14 @@ namespace { (void) llvm::createSCEVAAWrapperPass(); (void) llvm::createTypeBasedAAWrapperPass(); (void) llvm::createScopedNoAliasAAWrapperPass(); + (void) llvm::createDRFAAWrapperPass(); (void) llvm::createBreakCriticalEdgesPass(); (void) llvm::createCallGraphDOTPrinterPass(); (void) llvm::createCallGraphViewerPass(); (void) llvm::createCFGSimplificationPass(); (void) llvm::createStructurizeCFGPass(); (void) llvm::createCostModelAnalysisPass(); + (void) llvm::createDRFScopedNoAliasWrapperPass(); (void) llvm::createDeadArgEliminationPass(); (void) llvm::createDeadCodeEliminationPass(); (void) llvm::createDependenceAnalysisWrapperPass(); @@ -102,7 +106,9 @@ namespace { (void) llvm::createLoopPredicationPass(); (void) llvm::createLoopSimplifyPass(); (void) llvm::createLoopSimplifyCFGPass(); + (void) llvm::createLoopSpawningTIPass(); (void) llvm::createLoopStrengthReducePass(); + (void) llvm::createLoopStripMinePass(); (void) llvm::createLoopUnrollPass(); (void) llvm::createLoopRotatePass(); (void) llvm::createLowerConstantIntrinsicsPass(); @@ -110,6 +116,7 @@ namespace { (void) llvm::createLowerGlobalDtorsLegacyPass(); (void) llvm::createLowerInvokePass(); (void) llvm::createLowerSwitchPass(); + (void) llvm::createLowerTapirToTargetPass(); (void) llvm::createNaryReassociatePass(); (void) llvm::createObjCARCContractPass(); (void) llvm::createPromoteMemoryToRegisterPass(); @@ -160,6 +167,8 @@ namespace { (void) llvm::createUnifyLoopExitsPass(); (void) llvm::createFixIrreduciblePass(); (void)llvm::createSelectOptimizePass(); + (void) llvm::createTaskCanonicalizePass(); + (void) llvm::createTaskSimplifyPass(); (void)new llvm::IntervalPartition(); (void)new llvm::ScalarEvolutionWrapperPass(); diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h index fdb407263787f6..8a2cd4ed3245dd 100644 --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -55,6 +55,10 @@ class PipelineTuningOptions { /// level. bool SLPVectorization; + /// Tuning option to enable/disable loop stripmining. Its default value + /// is that of the flag: `-stripmine-loops`. + bool LoopStripmine; + /// Tuning option to enable/disable loop unrolling. Its default value is true. bool LoopUnrolling; @@ -228,6 +232,24 @@ class PassBuilder { buildModuleOptimizationPipeline(OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase); + /// Construct the pipeline for lowering Tapir loops to a target parallel + /// runtime. + /// + /// This pipeline is intended to be used early within + /// buildTapirLoweringPipeline at Level > O0 or run on its own for debugging + /// purposes. + ModulePassManager buildTapirLoopLoweringPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase); + + /// Construct the pipeline for lowering Tapir constructs to a target parallel + /// runtime. + /// + /// This pipeline is intended to be used with the PerModuleDefault pipeline + /// and various LTO pipelines to lower Tapir constructs. This pipeline is + /// expected to run late in the parent pipelines. + ModulePassManager buildTapirLoweringPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase); + /// Build a per-module default optimization pipeline. /// /// This provides a good default optimization pipeline for per-module @@ -235,7 +257,8 @@ class PassBuilder { /// typically correspond to frontend "-O[123]" options for optimization /// levels \c O1, \c O2 and \c O3 resp. ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool LTOPreLink = false); + bool LTOPreLink = false, + bool LowerTapir = false); /// Build a fat object default optimization pipeline. /// @@ -266,7 +289,8 @@ class PassBuilder { /// addPreLinkLTODefaultPipeline, and the two coordinate closely. ModulePassManager buildThinLTODefaultPipeline(OptimizationLevel Level, - const ModuleSummaryIndex *ImportSummary); + const ModuleSummaryIndex *ImportSummary, + bool LowerTapir = false); /// Build a pre-link, LTO-targeting default optimization pipeline to a pass /// manager. @@ -284,13 +308,15 @@ class PassBuilder { /// when IR coming into the LTO phase was first run through \c /// addPreLinkLTODefaultPipeline, and the two coordinate closely. ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level, - ModuleSummaryIndex *ExportSummary); + ModuleSummaryIndex *ExportSummary, + bool LowerTapir = false); /// Build an O0 pipeline with the minimal semantically required passes. /// /// This should only be used for non-LTO and LTO pre-link pipelines. ModulePassManager buildO0DefaultPipeline(OptimizationLevel Level, - bool LTOPreLink = false); + bool LTOPreLink = false, + bool LowerTapir = false); /// Build the default `AAManager` with the default alias analysis pipeline /// registered. @@ -373,6 +399,11 @@ class PassBuilder { /// Print pass names. void printPassNames(raw_ostream &OS); + /// Add optimizations to run immediately after an + /// instrumentation pass, such as CilkSanitizer or CSI. + ModulePassManager + buildPostCilkInstrumentationPipeline(OptimizationLevel Level); + /// Register a callback for a default optimizer pipeline extension /// point /// @@ -495,6 +526,26 @@ class PassBuilder { FullLinkTimeOptimizationLastEPCallbacks.push_back(C); } + /// Register a callback for a default optimizer pipeline extension point. + /// + /// This extension point allows adding passes after optimizations have been + /// performed on the Tapir IR, but before Tapir constructs are lowered to a + /// target runtime. + void registerTapirLateEPCallback( + const std::function &C) { + TapirLateEPCallbacks.push_back(C); + } + + /// Register a callback for a default optimizer pipeline extension point. + /// + /// This extension point allows adding passes after optimizations have been + /// performed on the Tapir IR, but before Tapir constructs are lowered to a + /// target runtime. + void registerTapirLoopEndEPCallback( + const std::function &C) { + TapirLoopEndEPCallbacks.push_back(C); + } + /// Register a callback for parsing an AliasAnalysis Name to populate /// the given AAManager \p AA void registerParseAACallback( @@ -597,6 +648,10 @@ class PassBuilder { OptimizationLevel Level); void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM, OptimizationLevel Level); + void invokeTapirLateEPCallbacks(ModulePassManager &MPM, + OptimizationLevel Level); + void invokeTapirLoopEndEPCallbacks(ModulePassManager &MPM, + OptimizationLevel Level); private: // O1 pass pipeline @@ -655,6 +710,10 @@ class PassBuilder { FullLinkTimeOptimizationEarlyEPCallbacks; SmallVector, 2> FullLinkTimeOptimizationLastEPCallbacks; + SmallVector, 2> + TapirLateEPCallbacks; + SmallVector, 2> + TapirLoopEndEPCallbacks; SmallVector, 2> PipelineStartEPCallbacks; SmallVector, 2> diff --git a/llvm/include/llvm/Support/GenericLoopInfo.h b/llvm/include/llvm/Support/GenericLoopInfo.h index ac4f2d7010b411..07b7f4eac3b470 100644 --- a/llvm/include/llvm/Support/GenericLoopInfo.h +++ b/llvm/include/llvm/Support/GenericLoopInfo.h @@ -272,11 +272,12 @@ template class LoopBase { /// Return all blocks inside the loop that have successors outside of the /// loop. These are the blocks _inside of the current loop_ which branch out. /// The returned list is always unique. - void getExitingBlocks(SmallVectorImpl &ExitingBlocks) const; + void getExitingBlocks(SmallVectorImpl &ExitingBlocks, + bool IgnoreDetachUnwind = false) const; /// If getExitingBlocks would return exactly one block, return that block. /// Otherwise return null. - BlockT *getExitingBlock() const; + BlockT *getExitingBlock(bool IgnoreDetachUnwind = false) const; /// Return all of the successor blocks of this loop. These are the blocks /// _outside of the current loop_ which are branched to. diff --git a/llvm/include/llvm/Support/GenericLoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h index 85233d38f0f6db..eeb3c4a8442b6a 100644 --- a/llvm/include/llvm/Support/GenericLoopInfoImpl.h +++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h @@ -31,7 +31,7 @@ namespace llvm { /// template void LoopBase::getExitingBlocks( - SmallVectorImpl &ExitingBlocks) const { + SmallVectorImpl &ExitingBlocks, bool IgnoreDetachUnwind) const { assert(!isInvalid() && "Loop not in a valid state!"); for (const auto BB : blocks()) for (auto *Succ : children(BB)) @@ -45,7 +45,8 @@ void LoopBase::getExitingBlocks( /// getExitingBlock - If getExitingBlocks would return exactly one block, /// return that block. Otherwise return null. template -BlockT *LoopBase::getExitingBlock() const { +BlockT * +LoopBase::getExitingBlock(bool IgnoreDetachUnwind) const { assert(!isInvalid() && "Loop not in a valid state!"); auto notInLoop = [&](BlockT *BB) { return !contains(BB); }; auto isExitBlock = [&](BlockT *BB, bool AllowRepeats) -> BlockT * { diff --git a/llvm/include/llvm/Transforms/Instrumentation.h b/llvm/include/llvm/Transforms/Instrumentation.h index 392983a1984445..3ea3a517143f3a 100644 --- a/llvm/include/llvm/Transforms/Instrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation.h @@ -150,6 +150,29 @@ struct SanitizerCoverageOptions { SanitizerCoverageOptions() = default; }; +// Options for comprehensive static instrumentation +struct CSIOptions { + bool InstrumentFuncEntryExit = true; + bool InstrumentLoops = true; + bool InstrumentBasicBlocks = true; + bool InstrumentMemoryAccesses = true; + bool InstrumentCalls = true; + bool InstrumentAtomics = true; + bool InstrumentMemIntrinsics = true; + bool InstrumentTapir = true; + bool InstrumentAllocas = true; + bool InstrumentAllocFns = true; + bool Interpose = true; + + // TODO: With recent changes LLVM's JIT technology, the jitMode flag no longer + // seems to be necessary. + bool jitMode = false; + bool CallsMayThrow = true; + bool CallsTerminateBlocks = true; + + CSIOptions() = default; +}; + /// Calculate what to divide by to scale counts. /// /// Given the maximum count, calculate a divisor that will scale all the diff --git a/llvm/include/llvm/Transforms/Instrumentation/CSI.h b/llvm/include/llvm/Transforms/Instrumentation/CSI.h new file mode 100644 index 00000000000000..c6deb5561ff2b0 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/CSI.h @@ -0,0 +1,1619 @@ +//===- CSI.h - CSI implementation structures and hooks -------*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of CSI, a framework that provides comprehensive static +// instrumentation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_CSI_H +#define LLVM_TRANSFORMS_CSI_H + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h" + +namespace llvm { + +class Loop; +class LoopInfo; +class Spindle; +class Task; +class TaskInfo; +class ScalarEvolution; + +static const char *const CsiRtUnitInitName = "__csirt_unit_init"; +static const char *const CsiRtUnitCtorName = "csirt.unit_ctor"; +static const char *const CsiFunctionBaseIdName = "__csi_unit_func_base_id"; +static const char *const CsiFunctionExitBaseIdName = + "__csi_unit_func_exit_base_id"; +static const char *const CsiBasicBlockBaseIdName = "__csi_unit_bb_base_id"; +static const char *const CsiLoopBaseIdName = "__csi_unit_loop_base_id"; +static const char *const CsiLoopExitBaseIdName = "__csi_unit_loop_exit_base_id"; +static const char *const CsiCallsiteBaseIdName = "__csi_unit_callsite_base_id"; +static const char *const CsiLoadBaseIdName = "__csi_unit_load_base_id"; +static const char *const CsiStoreBaseIdName = "__csi_unit_store_base_id"; +static const char *const CsiAllocaBaseIdName = "__csi_unit_alloca_base_id"; +static const char *const CsiDetachBaseIdName = "__csi_unit_detach_base_id"; +static const char *const CsiTaskBaseIdName = "__csi_unit_task_base_id"; +static const char *const CsiTaskExitBaseIdName = "__csi_unit_task_exit_base_id"; +static const char *const CsiDetachContinueBaseIdName = + "__csi_unit_detach_continue_base_id"; +static const char *const CsiSyncBaseIdName = "__csi_unit_sync_base_id"; +static const char *const CsiAllocFnBaseIdName = "__csi_unit_allocfn_base_id"; +static const char *const CsiFreeBaseIdName = "__csi_unit_free_base_id"; + +static const char *const CsiDefaultDebugNamePrefix = "__csi_unit_function_name_"; + +static const char *const CsiUnitSizeTableName = "__csi_unit_size_table"; +static const char *const CsiUnitFedTableName = "__csi_unit_fed_table"; +static const char *const CsiFuncIdVariablePrefix = "__csi_func_id_"; +static const char *const CsiUnitFedTableArrayName = "__csi_unit_fed_tables"; +static const char *const CsiUnitSizeTableArrayName = "__csi_unit_size_tables"; +static const char *const CsiInitCallsiteToFunctionName = + "__csi_init_callsite_to_function"; +static const char *const CsiDisableInstrumentationName = + "__csi_disable_instrumentation"; + +using csi_id_t = int64_t; +static const csi_id_t CsiUnknownId = -1; +static const csi_id_t CsiCallsiteUnknownTargetId = CsiUnknownId; +// See clang/lib/CodeGen/CodeGenModule.h: +static const int CsiUnitCtorPriority = 0; + +/// Maintains a mapping from CSI ID to static data for that ID. +class ForensicTable { +public: + ForensicTable() {} + ForensicTable(Module &M, StringRef BaseIdName, StringRef TableName = "", + bool UseExistingBaseId = true); + + /// The number of entries in this forensic table + uint64_t size() const { return IdCounter; } + + /// Get the local ID of the given Value. + uint64_t getId(const Value *V); + + /// The GlobalVariable holding the base ID for this forensic table. + GlobalVariable *baseId() const { return BaseId; } + + /// Converts a local to global ID conversion. + /// + /// This is done by using the given IRBuilder to insert a load to the base ID + /// global variable followed by an add of the base value and the local ID. + /// + /// \returns A Value holding the global ID corresponding to the + /// given local ID. + Value *localToGlobalId(uint64_t LocalId, IRBuilder<> &IRB) const; + + /// Helper function to get or create a string for a forensic-table entry. + static Constant *getObjectStrGV(Module &M, StringRef Str, const Twine GVName); + +protected: + /// The GlobalVariable holding the base ID for this FED table. + GlobalVariable *BaseId = nullptr; + /// Counter of local IDs used so far. + uint64_t IdCounter = 0; + /// Map of Value to Local ID. + DenseMap ValueToLocalIdMap; + StringRef TableName; +}; + +/// Maintains a mapping from CSI ID to front-end data for that ID. +/// +/// The front-end data currently is the source location that a given +/// CSI ID corresponds to. +class FrontEndDataTable : public ForensicTable { +public: + FrontEndDataTable() : ForensicTable() {} + FrontEndDataTable(Module &M, StringRef BaseIdName, + StringRef TableName = CsiUnitFedTableName, + StringRef DebugNamePrefix = CsiDefaultDebugNamePrefix, + bool UseExistingBaseId = true) + : ForensicTable(M, BaseIdName, TableName, UseExistingBaseId), + DebugNamePrefix(DebugNamePrefix) {} + + /// The number of entries in this FED table + uint64_t size() const { return LocalIdToSourceLocationMap.size(); } + + /// Add the given Function to this FED table. + /// \returns The local ID of the Function. + uint64_t add(const Function &F); + + /// Add the given BasicBlock to this FED table. + /// \returns The local ID of the BasicBlock. + uint64_t add(const BasicBlock &BB); + + /// Add the given Instruction to this FED table. + /// \returns The local ID of the Instruction. + uint64_t add(const Instruction &I, const StringRef &RealName = ""); + + /// Get the Type for a pointer to a FED table entry. + /// + /// A FED table entry is just a source location. + static PointerType *getPointerType(LLVMContext &C); + + /// Insert this FED table into the given Module. + /// + /// The FED table is constructed as a ConstantArray indexed by local + /// IDs. The runtime is responsible for performing the mapping that + /// allows the table to be indexed by global ID. + Constant *insertIntoModule(Module &M) const; + +private: + struct SourceLocation { + StringRef Name; + int32_t Line; + int32_t Column; + StringRef Filename; + StringRef Directory; + }; + StringRef DebugNamePrefix; + + /// Map of local ID to SourceLocation. + DenseMap LocalIdToSourceLocationMap; + + /// Create a struct type to match the "struct SourceLocation" type. + /// (and the source_loc_t type in csi.h). + static StructType *getSourceLocStructType(LLVMContext &C); + + /// Append the debug information to the table, assigning it the next + /// available ID. + /// + /// \returns The local ID of the appended information. + /// @{ + void add(uint64_t ID, const DILocation *Loc, const StringRef &RealName = ""); + void add(uint64_t ID, const DISubprogram *Subprog); + /// @} + + /// Append the line and file information to the table, assigning it + /// the next available ID. + /// + /// \returns The new local ID of the DILocation. + void add(uint64_t ID, int32_t Line = -1, int32_t Column = -1, + StringRef Filename = "", StringRef Directory = "", + StringRef Name = ""); +}; + +/// Maintains a mapping from CSI ID of a basic block to the size of that basic +/// block in LLVM IR instructions. +class SizeTable : public ForensicTable { +public: + SizeTable() : ForensicTable() {} + SizeTable(Module &M, StringRef BaseIdName) : ForensicTable(M, BaseIdName) {} + + /// The number of entries in this table + uint64_t size() const { return LocalIdToSizeMap.size(); } + + /// Add the given basic block to this table. + /// \returns The local ID of the basic block. + uint64_t add(const BasicBlock &BB, TargetTransformInfo *TTI); + + /// Get the Type for a pointer to a table entry. + /// + /// A table entry is just a source location. + static PointerType *getPointerType(LLVMContext &C); + + /// Insert this table into the given Module. + /// + /// The table is constructed as a ConstantArray indexed by local IDs. The + /// runtime is responsible for performing the mapping that allows the table to + /// be indexed by global ID. + Constant *insertIntoModule(Module &M) const; + +private: + struct SizeInformation { + // This count includes every IR instruction. + int32_t FullIRSize; + // This count excludes IR instructions that don't lower to any real + // instructions, e.g., PHI instructions, debug intrinsics, and lifetime + // intrinsics. + int32_t NonEmptyIRSize; + }; + + /// Map of local ID to size. + DenseMap LocalIdToSizeMap; + + /// Create a struct type to match the "struct SourceLocation" type. + /// (and the source_loc_t type in csi.h). + static StructType *getSizeStructType(LLVMContext &C); + + /// Append the size information to the table. + void add(uint64_t ID, int32_t FullIRSize = 0, int32_t NonEmptyIRSize = 0); +}; + +/// Represents a property value passed to hooks. +class CsiProperty { +public: + CsiProperty() {} + + virtual ~CsiProperty() {} + + /// Return the coerced type of a property. + /// + /// TODO: Right now, this function simply returns a 64-bit integer. Although + /// this solution works for x86_64, it should be generalized to handle other + /// architectures in the future. + static Type *getCoercedType(LLVMContext &C, StructType *Ty) { + // Must match the definition of property type in csi.h + // return StructType::get(IntegerType::get(C, 64), + // nullptr); + // We return an integer type, rather than a struct type, to deal with x86_64 + // type coercion on struct bit fields. + return IntegerType::get(C, 64); + } + + /// Return a constant value holding this property. + virtual Constant *getValueImpl(LLVMContext &C) const = 0; + + Constant *getValue(LLVMContext &C) const { + return getValueImpl(C); + } + + Constant *getValue(IRBuilder<> &IRB) const { + return getValueImpl(IRB.getContext()); + } +}; + +class CsiFuncProperty : public CsiProperty { +public: + CsiFuncProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.NumSyncReg), + IntegerType::get(C, PropBits.MaySpawn), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the number of sync regions in this function. + void setNumSyncReg(unsigned v) { PropValue.Fields.NumSyncReg = v; } + + /// Set the value of the MaySpawn property. + void setMaySpawn(bool v) { PropValue.Fields.MaySpawn = v; } + + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned NumSyncReg : 8; + unsigned MaySpawn : 1; + uint64_t Padding : 55; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int NumSyncReg; + int MaySpawn; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {8, 1, (64 - 8 - 1)}; +}; + +class CsiFuncExitProperty : public CsiProperty { +public: + CsiFuncExitProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.MaySpawn), + IntegerType::get(C, PropBits.EHReturn), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the MaySpawn property. + void setMaySpawn(bool v) { PropValue.Fields.MaySpawn = v; } + + /// Set the value of the EHReturn property. + void setEHReturn(bool v) { PropValue.Fields.EHReturn = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned MaySpawn : 1; + unsigned EHReturn : 1; + uint64_t Padding : 62; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int MaySpawn; + int EHReturn; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 1, (64 - 1 - 1)}; +}; + +class CsiLoopProperty : public CsiProperty { +public: + CsiLoopProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.IsTapirLoop), + IntegerType::get(C, PropBits.HasUniqueExitingBlock), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsTapirLoop property. + void setIsTapirLoop(bool v) { PropValue.Fields.IsTapirLoop = v; } + + /// Set the value of the HasUniqueExitingBlock property. + void setHasUniqueExitingBlock(bool v) { + PropValue.Fields.HasUniqueExitingBlock = v; + } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsTapirLoop : 1; + unsigned HasUniqueExitingBlock : 1; + uint64_t Padding : 62; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsTapirLoop; + int HasUniqueExitingBlock; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 1, (64 - 1 - 1)}; +}; + +class CsiLoopExitProperty : public CsiProperty { +public: + CsiLoopExitProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.IsLatch), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsLandingPad property. + void setIsLatch(bool v) { PropValue.Fields.IsLatch = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsLatch : 1; + uint64_t Padding : 63; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsLatch; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, (64 - 1)}; +}; + +class CsiBBProperty : public CsiProperty { +public: + CsiBBProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.IsLandingPad), + IntegerType::get(C, PropBits.IsEHPad), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsLandingPad property. + void setIsLandingPad(bool v) { PropValue.Fields.IsLandingPad = v; } + + /// Set the value of the IsEHPad property. + void setIsEHPad(bool v) { PropValue.Fields.IsEHPad = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsLandingPad : 1; + unsigned IsEHPad : 1; + uint64_t Padding : 62; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsLandingPad; + int IsEHPad; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 1, (64 - 1 - 1)}; +}; + +class CsiDetachProperty : public CsiProperty { +public: + CsiDetachProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.ForTapirLoopBody), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsTapirLoopBody property. + void setForTapirLoopBody(bool v) { PropValue.Fields.ForTapirLoopBody = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned ForTapirLoopBody : 1; + uint64_t Padding : 63; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int ForTapirLoopBody; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, (64 - 1)}; +}; + +class CsiTaskProperty : public CsiProperty { +public: + CsiTaskProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.IsTapirLoopBody), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsTapirLoop property. + void setIsTapirLoopBody(bool v) { PropValue.Fields.IsTapirLoopBody = v; } + + /// Set the number of sync regions in this function. + void setNumSyncReg(unsigned v) { PropValue.Fields.NumSyncReg = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsTapirLoopBody : 1; + unsigned NumSyncReg : 8; + uint64_t Padding : 55; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsTapirLoopBody; + int NumSyncReg; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 8, (64 - 1 - 8)}; +}; + +class CsiTaskExitProperty : public CsiProperty { +public: + CsiTaskExitProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.IsTapirLoopBody), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsTapirLoopBody property. + void setIsTapirLoopBody(bool v) { PropValue.Fields.IsTapirLoopBody = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsTapirLoopBody : 1; + uint64_t Padding : 63; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsTapirLoopBody; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, (64 - 1)}; +}; + +class CsiDetachContinueProperty : public CsiProperty { +public: + CsiDetachContinueProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static StructType *getStructType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return StructType::get(IntegerType::get(C, PropBits.IsUnwind), + IntegerType::get(C, PropBits.ForTapirLoopBody), + IntegerType::get(C, PropBits.Padding)); + } + static Type *getType(LLVMContext &C) { + return getCoercedType(C, getStructType(C)); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get(StructTy, + // ConstantInt::get(IntegerType::get(C, 64), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsUnwind property. + void setIsUnwind(bool v = true) { PropValue.Fields.IsUnwind = v; } + + /// Set the value of the ForTapirLoopBody property. + void setForTapirLoopBody(bool v = true) { + PropValue.Fields.ForTapirLoopBody = v; + } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsUnwind : 1; + unsigned ForTapirLoopBody : 1; + uint64_t Padding : 62; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsUnwind; + int ForTapirLoopBody; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 1, (64 - 1 - 1)}; +}; + +class CsiCallProperty : public CsiProperty { +public: + CsiCallProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.IsIndirect), + IntegerType::get(C, PropBits.IsUnwind), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // StructType *StructTy = getType(C); + // return ConstantStruct::get( + // StructTy, + // ConstantInt::get(IntegerType::get(C, PropBits.IsIndirect), + // PropValue.IsIndirect), + // ConstantInt::get(IntegerType::get(C, PropBits.Padding), 0), + // nullptr); + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsIndirect property. + void setIsIndirect(bool v) { PropValue.Fields.IsIndirect = v; } + /// Set the value of the IsIndirect property. + void setIsUnwind(bool v = true) { PropValue.Fields.IsUnwind = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsIndirect : 1; + unsigned IsUnwind : 1; + uint64_t Padding : 62; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsIndirect; + int IsUnwind; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, 1, (64 - 1 - 1)}; +}; + +// This class assumes that fields in both the load and store properties appear +// in the same bit positions. +class CsiLoadStoreProperty : public CsiProperty { +public: + CsiLoadStoreProperty() { PropValue.Bits = 0; } + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h. + return CsiProperty::getCoercedType( + C, + StructType::get(IntegerType::get(C, PropBits.Alignment), + IntegerType::get(C, PropBits.IsVtableAccess), + IntegerType::get(C, PropBits.IsConstant), + IntegerType::get(C, PropBits.IsOnStack), + IntegerType::get(C, PropBits.MayBeCaptured), + IntegerType::get(C, PropBits.IsAtomic), + IntegerType::get(C, PropBits.IsThreadLocal), + IntegerType::get(C, PropBits.LoadReadBeforeWriteInBB), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the Alignment property. + void setAlignment(const MaybeAlign A) { + if (unsigned EncAlign = encode(A)) + PropValue.Fields.Alignment = 1 << (EncAlign - 1); + else + PropValue.Fields.Alignment = 0; + } + void setAlignment(const Align A) { + if (unsigned EncAlign = encode(A)) + PropValue.Fields.Alignment = 1 << (EncAlign - 1); + else + PropValue.Fields.Alignment = 0; + } + /// Set the value of the IsVtableAccess property. + void setIsVtableAccess(bool v) { PropValue.Fields.IsVtableAccess = v; } + /// Set the value of the IsConstant property. + void setIsConstant(bool v) { PropValue.Fields.IsConstant = v; } + /// Set the value of the IsOnStack property. + void setIsOnStack(bool v) { PropValue.Fields.IsOnStack = v; } + /// Set the value of the MayBeCaptured property. + void setMayBeCaptured(bool v) { PropValue.Fields.MayBeCaptured = v; } + /// Set the value of the IsAtomic property. + void setIsAtomic(bool v) { PropValue.Fields.IsAtomic = v; } + /// Set the value of the IsThreadLocal property. + void setIsThreadLocal(bool v) { PropValue.Fields.IsThreadLocal = v; } + /// Set the value of the LoadReadBeforeWriteInBB property. + void setLoadReadBeforeWriteInBB(bool v) { + PropValue.Fields.LoadReadBeforeWriteInBB = v; + } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned Alignment : 8; + unsigned IsVtableAccess : 1; + unsigned IsConstant : 1; + unsigned IsOnStack : 1; + unsigned MayBeCaptured : 1; + unsigned IsAtomic : 1; + unsigned IsThreadLocal : 1; + unsigned LoadReadBeforeWriteInBB : 1; + uint64_t Padding : 49; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int Alignment; + int IsVtableAccess; + int IsConstant; + int IsOnStack; + int MayBeCaptured; + int IsAtomic; + int IsThreadLocal; + int LoadReadBeforeWriteInBB; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = { + 8, 1, 1, 1, 1, 1, 1, 1, (64 - 8 - 1 - 1 - 1 - 1 - 1 - 1 - 1)}; +}; + +class CsiAllocaProperty : public CsiProperty { +public: + CsiAllocaProperty() { PropValue.Bits = 0; } + + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.IsStatic), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + // TODO: This solution works for x86, but should be generalized to support + // other architectures in the future. + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the IsIndirect property. + void setIsStatic(bool v) { PropValue.Fields.IsStatic = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned IsStatic : 1; + uint64_t Padding : 63; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int IsStatic; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {1, (64 - 1)}; +}; + +class CsiAllocFnProperty : public CsiProperty { +public: + CsiAllocFnProperty() { PropValue.Bits = 0; } + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.AllocFnTy), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the allocation function type (e.g., malloc, calloc, new). + void setAllocFnTy(unsigned v) { PropValue.Fields.AllocFnTy = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned AllocFnTy : 8; + uint64_t Padding : 56; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int AllocFnTy; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {8, (64 - 8)}; +}; + +class CsiFreeProperty : public CsiProperty { +public: + CsiFreeProperty() { PropValue.Bits = 0; } + /// Return the Type of a property. + static Type *getType(LLVMContext &C) { + // Must match the definition of property type in csi.h + return CsiProperty::getCoercedType( + C, StructType::get(IntegerType::get(C, PropBits.FreeTy), + IntegerType::get(C, PropBits.Padding))); + } + /// Get the default value for this property. + static Constant *getDefaultValueImpl(LLVMContext &C) { + return Constant::getNullValue(getType(C)); + } + + /// Return a constant value holding this property. + Constant *getValueImpl(LLVMContext &C) const override { + // Must match the definition of property type in csi.h + return ConstantInt::get(getType(C), PropValue.Bits); + } + + /// Set the value of the allocation function type (e.g., malloc, calloc, new). + void setFreeTy(unsigned v) { PropValue.Fields.FreeTy = v; } + +private: + typedef union { + // Must match the definition of property type in csi.h + struct { + unsigned FreeTy : 8; + uint64_t Padding : 56; + } Fields; + uint64_t Bits; + } Property; + + /// The underlying values of the properties. + Property PropValue; + + typedef struct { + int FreeTy; + int Padding; + } PropertyBits; + + /// The number of bits representing each property. + static constexpr PropertyBits PropBits = {8, (64 - 8)}; +}; + +struct CSISetupImpl { +public: + CSISetupImpl(Module &M, const CSIOptions &Options = CSIOptions()) + : M(M), Options(Options) {} + + bool run(); + +private: + bool setupFunction(Function &F); + + Module &M; + CSIOptions Options; +}; + +struct CSIImpl { +public: + CSIImpl(Module &M, CallGraph *CG, + function_ref GetDomTree, + function_ref GetLoopInfo, + function_ref GetTaskInfo, + function_ref GetTLI, + function_ref GetSE, + function_ref GetTTI, + const CSIOptions &Options = CSIOptions()) + : M(M), DL(M.getDataLayout()), CG(CG), GetDomTree(GetDomTree), + GetLoopInfo(GetLoopInfo), GetTaskInfo(GetTaskInfo), GetTLI(GetTLI), + GetScalarEvolution(GetSE), GetTTI(GetTTI), Options(Options) { + loadConfiguration(); + } + CSIImpl(Module &M, CallGraph *CG, + function_ref GetDomTree, + function_ref GetLoopInfo, + function_ref GetTaskInfo, + function_ref GetTLI, + const CSIOptions &Options = CSIOptions()) + : M(M), DL(M.getDataLayout()), CG(CG), GetDomTree(GetDomTree), + GetLoopInfo(GetLoopInfo), GetTaskInfo(GetTaskInfo), GetTLI(GetTLI), + Options(Options) { + loadConfiguration(); + } + + virtual ~CSIImpl() {} + + bool run(); + + /// Get the number of bytes accessed via the given address. + static int getNumBytesAccessed(Type *OrigTy, const DataLayout &DL); + + /// Members to extract properties of loads/stores. + static bool isVtableAccess(const Instruction *I); + static bool addrPointsToConstantData(const Value *Addr); + static bool isAtomic(const Instruction *I); + static bool isThreadLocalObject(const Value *Obj); + static bool isAllocFn(const Instruction *I, const TargetLibraryInfo *TLI); + static bool isAllocFn(const Value *V, const TargetLibraryInfo *TLI) { + if (const CallBase *CB = dyn_cast(V)) + return isAllocFn(CB, TLI); + return false; + } + static bool getAllocFnArgs(const Instruction *I, + SmallVectorImpl &AllocFnArgs, + Type *SizeTy, Type *AddrTy, + const TargetLibraryInfo &TLI); + static bool isFreeFn(const Instruction *I, const TargetLibraryInfo *TLI); + + /// Helper functions to set up the CFG for CSI instrumentation. + static void setupCalls(Function &F); + static void setupBlocks(Function &F, const TargetLibraryInfo *TLI, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr); + static void splitBlocksAtCalls(Function &F, DominatorTree *DT = nullptr, + LoopInfo *LI = nullptr); + + /// Helper function that identifies calls or invokes of placeholder functions, + /// such as debug-info intrinsics or lifetime intrinsics. + static bool callsPlaceholderFunction(const Instruction &I); + + static Constant *getDefaultID(IRBuilder<> &IRB) { + return IRB.getInt64(CsiUnknownId); + } + + static bool spawnsTapirLoopBody(DetachInst *DI, LoopInfo &LI, TaskInfo &TI); + + static BasicBlock::iterator + getFirstInsertionPtInDetachedBlock(BasicBlock *Detached); + + // Return true if BB is an entry block to a function or task, false otherwise. + static bool isEntryBlock(const BasicBlock &BB, const TaskInfo &TI); + +protected: + /// Initialize the CSI pass. + void initializeCsi(); + /// Finalize the CSI pass. + void finalizeCsi(); + + /// Initialize FunctionCallees for the CSI hooks. + /// @{ + void initializeLoadStoreHooks(); + void initializeFuncHooks(); + void initializeBasicBlockHooks(); + void initializeLoopHooks(); + void initializeCallsiteHooks(); + void initializeAllocaHooks(); + void initializeMemIntrinsicsHooks(); + void initializeTapirHooks(); + void initializeAllocFnHooks(); + /// @} + + static StructType *getUnitFedTableType(LLVMContext &C, + PointerType *EntryPointerType); + static Constant *fedTableToUnitFedTable(Module &M, + StructType *UnitFedTableType, + FrontEndDataTable &FedTable); + static StructType *getUnitSizeTableType(LLVMContext &C, + PointerType *EntryPointerType); + static Constant *sizeTableToUnitSizeTable(Module &M, + StructType *UnitSizeTableType, + SizeTable &SzTable); + /// Initialize the front-end data table structures. + void initializeFEDTables(); + /// Collect unit front-end data table structures for finalization. + void collectUnitFEDTables(); + /// Initialize the front-end data table structures. + void initializeSizeTables(); + /// Collect unit front-end data table structures for finalization. + void collectUnitSizeTables(); + + virtual CallInst *createRTUnitInitCall(IRBuilder<> &IRB); + + // Get the local ID of the given function. + uint64_t getLocalFunctionID(Function &F); + /// Generate a function that stores global function IDs into a set + /// of externally-visible global variables. + void generateInitCallsiteToFunction(); + + Instruction *getEntryBBInsertPt(BasicBlock &BB); + + /// Compute CSI properties on the given ordered list of loads and stores. + void computeLoadAndStoreProperties( + SmallVectorImpl> + &LoadAndStoreProperties, + SmallVectorImpl &BBLoadsAndStores); + + /// Insert calls to the instrumentation hooks. + /// @{ + void addLoadStoreInstrumentation(Instruction *I, FunctionCallee BeforeFn, + FunctionCallee AfterFn, Value *CsiId, + Type *AddrType, Value *Addr, int NumBytes, + CsiLoadStoreProperty &Prop); + void instrumentLoadOrStore(Instruction *I, CsiLoadStoreProperty &Prop); + void instrumentAtomic(Instruction *I); + bool instrumentMemIntrinsic(Instruction *I); + void instrumentCallsite(Instruction *I, DominatorTree *DT); + void instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI); + void instrumentLoop(Loop &L, TaskInfo &TI, ScalarEvolution *SE); + + void instrumentDetach(DetachInst *DI, unsigned SyncRegNum, + unsigned NumSyncRegs, DominatorTree *DT, TaskInfo &TI, + LoopInfo &LI); + void instrumentSync(SyncInst *SI, unsigned SyncRegNum); + void instrumentAlloca(Instruction *I, TaskInfo &TI); + void instrumentAllocFn(Instruction *I, DominatorTree *DT, + const TargetLibraryInfo *TLI); + void instrumentFree(Instruction *I, const TargetLibraryInfo *TLI); + + void interposeCall(Instruction *I); + + void instrumentFunction(Function &F); + /// @} + + /// Obtain the signature for the interposition function given the + /// original function that needs interpositioning. + Function *getInterpositionFunction(Function *F); + + /// Insert a call to the given hook function before the given instruction. + CallInst* insertHookCall(Instruction *I, FunctionCallee HookFunction, + ArrayRef HookArgs); + bool updateArgPHIs(BasicBlock *Succ, BasicBlock *BB, + FunctionCallee HookFunction, ArrayRef HookArgs, + ArrayRef DefaultHookArgs); + CallInst *insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, + FunctionCallee HookFunction, + ArrayRef HookArgs, + ArrayRef DefaultHookArgs); + void insertHookCallAtSharedEHSpindleExits(Spindle *SharedEHSpindle, Task *T, + FunctionCallee HookFunction, + FrontEndDataTable &FED, + ArrayRef HookArgs, + ArrayRef DefaultArgs); + + /// Return true if the given function should not be instrumented. + static bool shouldNotInstrumentFunction(Function &F); + + // Update the attributes on the instrumented function that might be + // invalidated by the inserted instrumentation. + void updateInstrumentedFnAttrs(Function &F); + // List of all allocation function types. This list needs to remain + // consistent with TargetLibraryInfo and with csi.h. + enum class AllocFnTy { + malloc = 0, + valloc, + calloc, + aligned_alloc, + realloc, + reallocf, + Znwj, + ZnwjRKSt9nothrow_t, + Znwm, + ZnwmRKSt9nothrow_t, + Znaj, + ZnajRKSt9nothrow_t, + Znam, + ZnamRKSt9nothrow_t, + msvc_new_int, + msvc_new_int_nothrow, + msvc_new_longlong, + msvc_new_longlong_nothrow, + msvc_new_array_int, + msvc_new_array_int_nothrow, + msvc_new_array_longlong, + msvc_new_array_longlong_nothrow, + ZnwjSt11align_val_t, + ZnwmSt11align_val_t, + ZnajSt11align_val_t, + ZnamSt11align_val_t, + ZnwjSt11align_val_tRKSt9nothrow_t, + ZnwmSt11align_val_tRKSt9nothrow_t, + ZnajSt11align_val_tRKSt9nothrow_t, + ZnamSt11align_val_tRKSt9nothrow_t, + posix_memalign, + strdup, + strndup, + LAST_ALLOCFNTY + }; + + static AllocFnTy getAllocFnTy(const LibFunc &F) { + switch (F) { + default: + return AllocFnTy::LAST_ALLOCFNTY; + case LibFunc_malloc: + return AllocFnTy::malloc; + case LibFunc_valloc: + return AllocFnTy::valloc; + case LibFunc_aligned_alloc: + // aligned_alloc(align_val_t, size_t) + return AllocFnTy::aligned_alloc; + case LibFunc_calloc: + return AllocFnTy::calloc; + case LibFunc_realloc: + return AllocFnTy::realloc; + case LibFunc_reallocf: + return AllocFnTy::reallocf; + case LibFunc_Znwj: + // new(unsigned int); + return AllocFnTy::Znwj; + case LibFunc_ZnwjRKSt9nothrow_t: + // new(unsigned int, nothrow); + return AllocFnTy::ZnwjRKSt9nothrow_t; + case LibFunc_Znwm: + // new(unsigned long); + return AllocFnTy::Znwm; + case LibFunc_ZnwmRKSt9nothrow_t: + // new(unsigned long, nothrow); + return AllocFnTy::ZnwmRKSt9nothrow_t; + case LibFunc_Znaj: + // new[](unsigned int); + return AllocFnTy::Znaj; + case LibFunc_ZnajRKSt9nothrow_t: + // new[](unsigned int, nothrow); + return AllocFnTy::ZnajRKSt9nothrow_t; + case LibFunc_Znam: + // new[](unsigned long); + return AllocFnTy::Znam; + case LibFunc_ZnamRKSt9nothrow_t: + // new[](unsigned long, nothrow); + return AllocFnTy::ZnamRKSt9nothrow_t; + case LibFunc_msvc_new_int: + // new(unsigned int); + return AllocFnTy::msvc_new_int; + case LibFunc_msvc_new_int_nothrow: + // new(unsigned int, nothrow); + return AllocFnTy::msvc_new_int_nothrow; + case LibFunc_msvc_new_longlong: + // new(unsigned long long); + return AllocFnTy::msvc_new_longlong; + case LibFunc_msvc_new_longlong_nothrow: + // new(unsigned long long, nothrow); + return AllocFnTy::msvc_new_longlong_nothrow; + case LibFunc_msvc_new_array_int: + // new[](unsigned int); + return AllocFnTy::msvc_new_array_int; + case LibFunc_msvc_new_array_int_nothrow: + // new[](unsigned int, nothrow); + return AllocFnTy::msvc_new_array_int_nothrow; + case LibFunc_msvc_new_array_longlong: + // new[](unsigned long long); + return AllocFnTy::msvc_new_array_longlong; + case LibFunc_msvc_new_array_longlong_nothrow: + // new[](unsigned long long, nothrow); + return AllocFnTy::msvc_new_array_longlong_nothrow; + case LibFunc_ZnwjSt11align_val_t: + // new(unsigned int, align_val_t) + return AllocFnTy::ZnwjSt11align_val_t; + case LibFunc_ZnwmSt11align_val_t: + // new(unsigned long, align_val_t) + return AllocFnTy::ZnwmSt11align_val_t; + case LibFunc_ZnajSt11align_val_t: + // new[](unsigned int, align_val_t) + return AllocFnTy::ZnajSt11align_val_t; + case LibFunc_ZnamSt11align_val_t: + // new[](unsigned long, align_val_t) + return AllocFnTy::ZnamSt11align_val_t; + case LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t: + // new(unsigned int, align_val_t, nothrow) + return AllocFnTy::ZnwjSt11align_val_tRKSt9nothrow_t; + case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t: + // new(unsigned long, align_val_t, nothrow) + return AllocFnTy::ZnwmSt11align_val_tRKSt9nothrow_t; + case LibFunc_ZnajSt11align_val_tRKSt9nothrow_t: + // new[](unsigned int, align_val_t, nothrow) + return AllocFnTy::ZnajSt11align_val_tRKSt9nothrow_t; + case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t: + // new[](unsigned long, align_val_t, nothrow) + return AllocFnTy::ZnamSt11align_val_tRKSt9nothrow_t; + case LibFunc_posix_memalign: + // posix_memalign(void **, size_t, size_t) + return AllocFnTy::posix_memalign; + case LibFunc_strdup: + // strdup(const char *) + return AllocFnTy::strdup; + case LibFunc_strndup: + // strdup(const char *, size_t) + return AllocFnTy::strndup; + } + } + + // List of all free function types. This list needs to remain consistent with + // TargetLibraryInfo and with csi.h. + enum class FreeTy { + free = 0, + ZdlPv, + ZdlPvRKSt9nothrow_t, + ZdlPvj, + ZdlPvm, + ZdaPv, + ZdaPvRKSt9nothrow_t, + ZdaPvj, + ZdaPvm, + msvc_delete_ptr32, + msvc_delete_ptr32_nothrow, + msvc_delete_ptr32_int, + msvc_delete_ptr64, + msvc_delete_ptr64_nothrow, + msvc_delete_ptr64_longlong, + msvc_delete_array_ptr32, + msvc_delete_array_ptr32_nothrow, + msvc_delete_array_ptr32_int, + msvc_delete_array_ptr64, + msvc_delete_array_ptr64_nothrow, + msvc_delete_array_ptr64_longlong, + ZdlPvSt11align_val_t, + ZdlPvSt11align_val_tRKSt9nothrow_t, + ZdaPvSt11align_val_t, + ZdaPvSt11align_val_tRKSt9nothrow_t, + LAST_FREETY + }; + + static FreeTy getFreeTy(const LibFunc &F) { + switch (F) { + default: + return FreeTy::LAST_FREETY; + case LibFunc_free: + return FreeTy::free; + case LibFunc_ZdlPv: + // void operator delete(void*); + return FreeTy::ZdlPv; + case LibFunc_ZdlPvRKSt9nothrow_t: + // void operator delete(void*, nothrow); + return FreeTy::ZdlPvRKSt9nothrow_t; + case LibFunc_ZdlPvj: + // void operator delete(void*, unsigned int); + return FreeTy::ZdlPvj; + case LibFunc_ZdlPvm: + // void operator delete(void*, unsigned long); + return FreeTy::ZdlPvm; + case LibFunc_ZdaPv: + // void operator delete[](void*); + return FreeTy::ZdaPv; + case LibFunc_ZdaPvRKSt9nothrow_t: + // void operator delete[](void*, nothrow); + return FreeTy::ZdaPvRKSt9nothrow_t; + case LibFunc_ZdaPvj: + // void operator delete[](void*, unsigned int); + return FreeTy::ZdaPvj; + case LibFunc_ZdaPvm: + // void operator delete[](void*, unsigned long); + return FreeTy::ZdaPvm; + case LibFunc_msvc_delete_ptr32: + // void operator delete(void*); + return FreeTy::msvc_delete_ptr32; + case LibFunc_msvc_delete_ptr32_nothrow: + // void operator delete(void*, nothrow); + return FreeTy::msvc_delete_ptr32_nothrow; + case LibFunc_msvc_delete_ptr32_int: + // void operator delete(void*, unsigned int); + return FreeTy::msvc_delete_ptr32_int; + case LibFunc_msvc_delete_ptr64: + // void operator delete(void*); + return FreeTy::msvc_delete_ptr64; + case LibFunc_msvc_delete_ptr64_nothrow: + // void operator delete(void*, nothrow); + return FreeTy::msvc_delete_ptr64_nothrow; + case LibFunc_msvc_delete_ptr64_longlong: + // void operator delete(void*, unsigned long long); + return FreeTy::msvc_delete_ptr64_longlong; + case LibFunc_msvc_delete_array_ptr32: + // void operator delete[](void*); + return FreeTy::msvc_delete_array_ptr32; + case LibFunc_msvc_delete_array_ptr32_nothrow: + // void operator delete[](void*, nothrow); + return FreeTy::msvc_delete_array_ptr32_nothrow; + case LibFunc_msvc_delete_array_ptr32_int: + // void operator delete[](void*, unsigned int); + return FreeTy::msvc_delete_array_ptr32_int; + case LibFunc_msvc_delete_array_ptr64: + // void operator delete[](void*); + return FreeTy::msvc_delete_array_ptr64; + case LibFunc_msvc_delete_array_ptr64_nothrow: + // void operator delete[](void*, nothrow); + return FreeTy::msvc_delete_array_ptr64_nothrow; + case LibFunc_msvc_delete_array_ptr64_longlong: + // void operator delete[](void*, unsigned long long); + return FreeTy::msvc_delete_array_ptr64_longlong; + case LibFunc_ZdlPvSt11align_val_t: + // void operator delete(void*, align_val_t) + return FreeTy::ZdlPvSt11align_val_t; + case LibFunc_ZdlPvSt11align_val_tRKSt9nothrow_t: + // void operator delete(void*, align_val_t, nothrow) + return FreeTy::ZdlPvSt11align_val_tRKSt9nothrow_t; + case LibFunc_ZdaPvSt11align_val_t: + // void operator delete[](void*, align_val_t) + return FreeTy::ZdaPvSt11align_val_t; + case LibFunc_ZdaPvSt11align_val_tRKSt9nothrow_t: + // void operator delete[](void*, align_val_t, nothrow) + return FreeTy::ZdaPvSt11align_val_tRKSt9nothrow_t; + } + } + + void linkInToolFromBitcode(const std::string &BitcodePath); + void loadConfiguration(); + + Module &M; + const DataLayout &DL; + CallGraph *CG; + function_ref GetDomTree; + function_ref GetLoopInfo; + function_ref GetTaskInfo; + function_ref GetTLI; + std::optional> GetScalarEvolution; + std::optional> GetTTI; + CSIOptions Options; + + FrontEndDataTable FunctionFED, FunctionExitFED, BasicBlockFED, LoopFED, + LoopExitFED, CallsiteFED, LoadFED, StoreFED, AllocaFED, DetachFED, + TaskFED, TaskExitFED, DetachContinueFED, SyncFED, AllocFnFED, FreeFED; + + SmallVector UnitFedTables; + + SizeTable BBSize; + SmallVector UnitSizeTables; + + // Instrumentation hooks + FunctionCallee CsiFuncEntry = nullptr, CsiFuncExit = nullptr; + FunctionCallee CsiBBEntry = nullptr, CsiBBExit = nullptr; + FunctionCallee CsiBeforeCallsite = nullptr, CsiAfterCallsite = nullptr; + FunctionCallee CsiBeforeLoop = nullptr, CsiAfterLoop = nullptr; + FunctionCallee CsiLoopBodyEntry = nullptr, CsiLoopBodyExit = nullptr; + FunctionCallee CsiBeforeRead = nullptr, CsiAfterRead = nullptr; + FunctionCallee CsiBeforeWrite = nullptr, CsiAfterWrite = nullptr; + FunctionCallee CsiAfterAlloca = nullptr; + FunctionCallee CsiDetach = nullptr, CsiDetachContinue = nullptr; + FunctionCallee CsiTaskEntry = nullptr, CsiTaskExit = nullptr; + FunctionCallee CsiBeforeSync = nullptr, CsiAfterSync = nullptr; + FunctionCallee CsiBeforeAllocFn = nullptr, CsiAfterAllocFn = nullptr; + FunctionCallee CsiBeforeFree = nullptr, CsiAfterFree = nullptr; + + FunctionCallee MemmoveFn = nullptr, MemcpyFn = nullptr, MemsetFn = nullptr; + Function *InitCallsiteToFunction = nullptr; + // GlobalVariable *DisableInstrGV; + + // Runtime unit initialization + FunctionCallee RTUnitInit = nullptr; + + Type *IntptrTy; + DenseMap FuncOffsetMap; + + DenseMap, + SmallVector> ArgPHIs; + SmallPtrSet SyncsWithUnwinds; + DenseMap EntryBBInsertPt; + + std::unique_ptr Config; + + // Declarations of interposition functions. + DenseMap InterpositionFunctions; + + bool LinkedBitcode = false; + SmallSet FunctionsInBitcode; + SmallPtrSet LinkedFromBitcode; + + // // Cached results of calls to GetUnderlyingObject. + // using UnderlyingObjMapTy = DenseMap; + // mutable UnderlyingObjMapTy UnderlyingObject; + Value *lookupUnderlyingObject(Value *Addr) const; + + friend struct CSISetupImpl; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_CSI_H diff --git a/llvm/include/llvm/Transforms/Instrumentation/CilkSanitizer.h b/llvm/include/llvm/Transforms/Instrumentation/CilkSanitizer.h new file mode 100644 index 00000000000000..f53ace2c9c3f89 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/CilkSanitizer.h @@ -0,0 +1,34 @@ +//===- CilkSanitizer.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file is part of CilkSan, a determinacy-race detector for Cilk and Tapir +/// programs. +/// +/// This instrumentation pass inserts calls to the CilkSan runtime library +/// before appropriate memory accesses. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_CILKSANITIZER_H +#define LLVM_TRANSFORMS_CILKSANITIZER_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +namespace llvm { + +/// CilkSanitizer pass for new pass manager. +class CilkSanitizerPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_CILKSANITIZER_H diff --git a/llvm/include/llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h new file mode 100644 index 00000000000000..0c81c35a36e887 --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h @@ -0,0 +1,48 @@ +//===- ComprehensiveStaticInstrumentation.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file is part of CSI, a framework that provides comprehensive static +/// instrumentation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_COMPREHENSIVESTATICINSTRUMENTATION_H +#define LLVM_TRANSFORMS_COMPREHENSIVESTATICINSTRUMENTATION_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Instrumentation.h" + +namespace llvm { + +/// CSISetup pass for new pass manager. +class CSISetupPass : public PassInfoMixin { +public: + CSISetupPass(); + CSISetupPass(const CSIOptions &Options); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + CSIOptions Options; +}; + +/// ComprehensiveStaticInstrumentation pass for new pass manager. +class ComprehensiveStaticInstrumentationPass : + public PassInfoMixin { +public: + ComprehensiveStaticInstrumentationPass(); + ComprehensiveStaticInstrumentationPass(const CSIOptions &Options); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + +private: + CSIOptions Options; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_COMPREHENSIVESTATICINSTRUMENTATION_H diff --git a/llvm/include/llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h b/llvm/include/llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h new file mode 100644 index 00000000000000..403c320aee9dde --- /dev/null +++ b/llvm/include/llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h @@ -0,0 +1,156 @@ +//===-- SurgicalInstrumentationConfig.h -- Surgical CSI ------*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of CSI, a framework that provides comprehensive static +// instrumentation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SURGICALINSTRUMENTATIONCONFIG_H +#define LLVM_TRANSFORMS_INSTRUMENTATION_SURGICALINSTRUMENTATIONCONFIG_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +enum InstrumentationConfigMode { WHITELIST = 0, BLACKLIST = 1 }; + +enum InstrumentationPoint : int { + INSTR_INVALID_POINT = 0x0, + INSTR_FUNCTION_ENTRY = 0x1, + INSTR_FUNCTION_EXIT = 0x1 << 1, + INSTR_BEFORE_CALL = 0x1 << 2, + INSTR_AFTER_CALL = 0x1 << 3, + INSTR_TAPIR_DETACH = 0x1 << 4, + INSTR_TAPIR_SYNC = 0x1 << 5, +}; + +#define INSTR_ALL_POINTS InstrumentationPoint::INSTR_INVALID_POINT + +inline InstrumentationPoint operator|(const InstrumentationPoint &a, + const InstrumentationPoint &b) { + return static_cast(static_cast(a) | + static_cast(b)); +} + +inline InstrumentationPoint operator&(const InstrumentationPoint &a, + const InstrumentationPoint &b) { + return static_cast(static_cast(a) & + static_cast(b)); +} + +inline bool operator==(InstrumentationPoint a, InstrumentationPoint b) { + return static_cast(a) == static_cast(b); +} + +inline InstrumentationPoint &operator|=(InstrumentationPoint &a, + InstrumentationPoint b) { + return a = a | b; +} + +static StringMap SurgicalInstrumentationPoints = { + {"FunctionEntry", INSTR_FUNCTION_ENTRY}, + { + "FunctionExit", + INSTR_FUNCTION_EXIT, + }, + { + "BeforeCall", + INSTR_BEFORE_CALL, + }, + { + "AfterCall", + INSTR_AFTER_CALL, + }, + { + "TapirDetach", + INSTR_TAPIR_DETACH, + }, + { + "TapirSync", + INSTR_TAPIR_SYNC, + }, +}; + +InstrumentationPoint +ParseInstrumentationPoint(const StringRef &instrPointString); + +class InstrumentationConfig { +public: + virtual ~InstrumentationConfig() {} + + void SetConfigMode(InstrumentationConfigMode mode) { this->mode = mode; } + + static std::unique_ptr GetDefault(); + + static std::unique_ptr + ReadFromConfigurationFile(const std::string &filename); + + virtual bool DoesFunctionRequireInterposition(const StringRef &functionName) { + return interposedFunctions.find(functionName) != interposedFunctions.end(); + } + + virtual bool DoesAnyFunctionRequireInterposition() { + return interposedFunctions.size() > 0; + } + + virtual bool DoesFunctionRequireInstrumentationForPoint( + const StringRef &functionName, const InstrumentationPoint &point) { + if (targetFunctions.size() == 0) + return true; + + bool found = targetFunctions.find(functionName) != targetFunctions.end(); + + if (found) // The function is in the configuration. Does it specify this + // instrumentation point? + { + InstrumentationPoint &functionPoints = targetFunctions[functionName]; + + if (functionPoints != INSTR_ALL_POINTS) { + if ((targetFunctions[functionName] & point) != point) + found = false; + } + } + + return mode == InstrumentationConfigMode::WHITELIST ? found : !found; + } + +protected: + InstrumentationConfig(){}; + InstrumentationConfig(const StringMap &targetFunctions, + const StringSet<> &interposedFunctions) + : targetFunctions(targetFunctions), + interposedFunctions(interposedFunctions) {} + + StringMap targetFunctions; + + StringSet<> interposedFunctions; + + InstrumentationConfigMode mode = InstrumentationConfigMode::WHITELIST; +}; + +class DefaultInstrumentationConfig : public InstrumentationConfig { +public: + virtual bool DoesFunctionRequireInstrumentationForPoint( + const StringRef &functionName, + const InstrumentationPoint &point) override { + return true; + } + + virtual bool DoesAnyFunctionRequireInterposition() override { return false; } + + virtual bool + DoesFunctionRequireInterposition(const StringRef &functionName) override { + return false; + } +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_INSTRUMENTATION_SURGICALINSTRUMENTATIONCONFIG_H diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h index 0a00e3af03d2fd..4008574eb5e97f 100644 --- a/llvm/include/llvm/Transforms/Scalar/GVN.h +++ b/llvm/include/llvm/Transforms/Scalar/GVN.h @@ -53,6 +53,7 @@ class NonLocalDepResult; class OptimizationRemarkEmitter; class PHINode; class TargetLibraryInfo; +class TaskInfo; class Value; /// A private "module" namespace for types and utilities used by GVN. These /// are implementation details and should not be used by clients. @@ -262,7 +263,8 @@ class GVNPass : public PassInfoMixin { bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *ORE, MemorySSA *MSSA = nullptr); + OptimizationRemarkEmitter *ORE, TaskInfo *TI = nullptr, + MemorySSA *MSSA = nullptr); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { diff --git a/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h b/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h index b5d544f1149c6d..0870101a306cd2 100644 --- a/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h +++ b/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h @@ -32,6 +32,17 @@ class IndVarSimplifyPass : public PassInfoMixin { LoopStandardAnalysisResults &AR, LPMUpdater &U); }; +class TapirIndVarSimplifyPass : public PassInfoMixin { + /// Perform IV widening during the pass. + bool WidenIndVars; + +public: + TapirIndVarSimplifyPass(bool WidenIndVars = true) + : WidenIndVars(WidenIndVars) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); +}; + } // end namespace llvm #endif // LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h index 3364d7eaee4247..8c358350e239a0 100644 --- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h @@ -92,6 +92,8 @@ class JumpThreadingPass : public PassInfoMixin { #else SmallSet, 16> LoopHeaders; #endif + DenseMap, SmallPtrSet> + TapirTasks; unsigned BBDupThreshold; unsigned DefaultBBDupThreshold; @@ -111,6 +113,7 @@ class JumpThreadingPass : public PassInfoMixin { DomTreeUpdater *getDomTreeUpdater() const { return DTU.get(); } void findLoopHeaders(Function &F); + void findTapirTasks(Function &F, DominatorTree &DT); bool processBlock(BasicBlock *BB); bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB); void updateSSA(BasicBlock *BB, BasicBlock *NewBB, diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index b18e3054ef3ae4..fd9d3181c4a20e 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -36,6 +36,7 @@ class Function; class LLVMContext; class PHINode; class SelectInst; +class TaskInfo; class Use; /// A private "module" namespace for types and utilities used by SROA. These @@ -97,6 +98,7 @@ class SROAPass : public PassInfoMixin { LLVMContext *C = nullptr; DomTreeUpdater *DTU = nullptr; AssumptionCache *AC = nullptr; + TaskInfo *TI = nullptr; const bool PreserveCFG; /// Worklist of alloca instructions to simplify. @@ -173,9 +175,9 @@ class SROAPass : public PassInfoMixin { /// Helper used by both the public run method and by the legacy pass. PreservedAnalyses runImpl(Function &F, DomTreeUpdater &RunDTU, - AssumptionCache &RunAC); + AssumptionCache &RunAC, TaskInfo &RunTI); PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC); + AssumptionCache &RunAC, TaskInfo &RunTI); bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, diff --git a/llvm/include/llvm/Transforms/Tapir.h b/llvm/include/llvm/Transforms/Tapir.h new file mode 100644 index 00000000000000..17a1ec26af12f9 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir.h @@ -0,0 +1,67 @@ +//===- Tapir.h - Tapir Transformations --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header file defines prototypes for accessor functions that expose passes +// in the Tapir transformations library. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_H +#define LLVM_TRANSFORMS_TAPIR_H + +namespace llvm { +class Pass; +class ModulePass; +class FunctionPass; +enum class TapirTargetID; + +//===----------------------------------------------------------------------===// +// +// LoopSpawningTI - Create a loop spawning pass that uses Task Info. +// +Pass *createLoopSpawningTIPass(); + +//===----------------------------------------------------------------------===// +// +// LowerTapirToTarget - Lower Tapir constructs to a specified parallel runtime. +// +ModulePass *createLowerTapirToTargetPass(); + +//===----------------------------------------------------------------------===// +// +// TaskCanonicalize - Canonicalize Tapir tasks +// +FunctionPass *createTaskCanonicalizePass(); + +//===----------------------------------------------------------------------===// +// +// TaskSimplify - Simplify Tapir tasks +// +FunctionPass *createTaskSimplifyPass(); + +//===----------------------------------------------------------------------===// +// +// DRFScopedNoAlias - Add scoped-noalias information based on DRF assumption +// +FunctionPass *createDRFScopedNoAliasWrapperPass(); + +//===----------------------------------------------------------------------===// +// +// LoopStripMinePass - Stripmine Tapir loops +// +Pass *createLoopStripMinePass(int Count = -1); + +//===----------------------------------------------------------------------===// +// +// SerializeSmallTasksPass - Serialize small Tapir tasks +// +FunctionPass *createSerializeSmallTasksPass(); + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/CilkABI.h b/llvm/include/llvm/Transforms/Tapir/CilkABI.h new file mode 100644 index 00000000000000..a803d7c2597f02 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/CilkABI.h @@ -0,0 +1,134 @@ +//===- CilkABI.h - Interface to the Intel Cilk Plus runtime ----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Cilk ABI to converts Tapir instructions to calls +// into the Cilk runtime system. +// +//===----------------------------------------------------------------------===// +#ifndef CILK_ABI_H_ +#define CILK_ABI_H_ + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; +class TapirLoopInfo; + +class CilkABI : public TapirTarget { + ValueToValueMapTy DetachCtxToStackFrame; + SmallPtrSet CallsToInline; + + // Cilk RTS data types + StructType *PedigreeTy = nullptr; + enum PedigreeFields { rank = 0, next }; + StructType *StackFrameTy = nullptr; + enum StackFrameFields + { + flags = 0, + size, + call_parent, + worker, + except_data, + ctx, + mxcsr, + fpcsr, + reserved, + parent_pedigree + }; + StructType *WorkerTy = nullptr; + enum WorkerFields + { + tail = 0, + head, + exc, + protected_tail, + ltq_limit, + self, + g, + l, + reducer_map, + current_stack_frame, + saved_protected_tail, + sysdep, + pedigree + }; + + // Opaque Cilk RTS functions + FunctionCallee CilkRTSInit = nullptr; + FunctionCallee CilkRTSLeaveFrame = nullptr; + FunctionCallee CilkRTSRethrow = nullptr; + FunctionCallee CilkRTSSync = nullptr; + FunctionCallee CilkRTSGetNworkers = nullptr; + FunctionCallee CilkRTSGetTLSWorker = nullptr; + FunctionCallee CilkRTSGetTLSWorkerFast = nullptr; + FunctionCallee CilkRTSBindThread1 = nullptr; + + // Accessors for opaque Cilk RTS functions + FunctionCallee Get__cilkrts_init(); + FunctionCallee Get__cilkrts_leave_frame(); + FunctionCallee Get__cilkrts_rethrow(); + FunctionCallee Get__cilkrts_sync(); + FunctionCallee Get__cilkrts_get_nworkers(); + FunctionCallee Get__cilkrts_get_tls_worker(); + FunctionCallee Get__cilkrts_get_tls_worker_fast(); + FunctionCallee Get__cilkrts_bind_thread_1(); + // Accessors for compiler-generated Cilk RTS functions + Function *Get__cilkrts_enter_frame_1(); + Function *Get__cilkrts_enter_frame_fast_1(); + Function *Get__cilkrts_detach(); + Function *Get__cilkrts_pop_frame(); + + // Helper functions for implementing the Cilk ABI protocol + Function *GetCilkSyncFn(bool instrument = false); + Function *GetCilkSyncNothrowFn(bool instrument = false); + Function *GetCilkCatchExceptionFn(Type *ExnTy); + Function *GetCilkParentEpilogueFn(bool instrument = false); + void EmitSaveFloatingPointState(IRBuilder<> &B, Value *SF); + AllocaInst *CreateStackFrame(Function &F); + Value *GetOrInitCilkStackFrame(Function &F, bool Helper, + bool instrumet = false); + CallInst *EmitCilkSetJmp(IRBuilder<> &B, Value *SF); + bool makeFunctionDetachable(Function &Extracted, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool instrument = false); + +public: + CilkABI(Module &M); + ~CilkABI() { DetachCtxToStackFrame.clear(); } + void prepareModule() override final; + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + + ArgStructMode getArgStructMode() const override final; + void addHelperAttributes(Function &F) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpaner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; + + LoopOutlineProcessor * + getLoopOutlineProcessor(const TapirLoopInfo *TL) const override final; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/CilkRTSCilkFor.h b/llvm/include/llvm/Transforms/Tapir/CilkRTSCilkFor.h new file mode 100644 index 00000000000000..f0dfd4559cbbb9 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/CilkRTSCilkFor.h @@ -0,0 +1,60 @@ +//===- CilkRTSCilkFor.h - Interface to __cilkrts_cilk_for ------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a loop-outline processor to lower Tapir loops to a call +// to a Cilk runtime method, __cilkrts_cilk_for. +// +//===----------------------------------------------------------------------===// +#ifndef CILKRTS_CILK_FOR_H_ +#define CILKRTS_CILK_FOR_H_ + +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; +class TapirLoopInfo; + +extern cl::opt UseRuntimeCilkFor; + +/// The RuntimeCilkFor loop-outline processor transforms an outlined Tapir loop +/// to be processed using a call to a runtime method __cilkrts_cilk_for_32 or +/// __cilkrts_cilk_for_64. +class RuntimeCilkFor : public LoopOutlineProcessor { + Type *GrainsizeType = nullptr; + FunctionCallee CilkRTSCilkFor32 = nullptr; + FunctionCallee CilkRTSCilkFor64 = nullptr; + + FunctionCallee Get__cilkrts_cilk_for_32(); + FunctionCallee Get__cilkrts_cilk_for_64(); + +public: + RuntimeCilkFor(Module &M) : LoopOutlineProcessor(M) { + GrainsizeType = Type::getInt32Ty(M.getContext()); + } + + ArgStructMode getArgStructMode() const override final { + // return ArgStructMode::Dynamic; + return ArgStructMode::Static; + } + void setupLoopOutlineArgs( + Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, + ValueSet &InputSet, const SmallVectorImpl &LCArgs, + const SmallVectorImpl &LCInputs, + const ValueSet &TLInputsFixed) + override final; + unsigned getIVArgIndex(const Function &F, const ValueSet &Args) const + override final; + void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) override final; + void processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, + DominatorTree &DT) override final; +}; +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/CudaABI.h b/llvm/include/llvm/Transforms/Tapir/CudaABI.h new file mode 100644 index 00000000000000..88ee58cc6f37db --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/CudaABI.h @@ -0,0 +1,91 @@ +//===- CudaABI.h - Interface to the Kitsune CUDA back end ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Kitsune CUDA ABI to convert Tapir instructions to +// calls into the Kitsune runtime system for NVIDIA GPU code. +// +//===----------------------------------------------------------------------===// +#ifndef CUDA_ABI_H_ +#define CUDA_ABI_H_ + +#include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" + +namespace llvm { + +class DataLayout; +class TargetMachine; + +class CudaABI : public TapirTarget { +public: + CudaABI(Module &M) : TapirTarget(M) {} + ~CudaABI() {} + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + + void addHelperAttributes(Function &F) override final {} + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; + + LoopOutlineProcessor * + getLoopOutlineProcessor(const TapirLoopInfo *TL) const override final; +}; + +class PTXLoop : public LoopOutlineProcessor { +private: + static unsigned NextKernelID; + unsigned MyKernelID; + Module PTXM; + TargetMachine *PTXTargetMachine; + GlobalVariable *PTXGlobal; + + FunctionCallee GetThreadIdx = nullptr; + FunctionCallee GetBlockIdx = nullptr; + FunctionCallee GetBlockDim = nullptr; + FunctionCallee KitsuneCUDAInit = nullptr; + FunctionCallee KitsuneGPUInitKernel = nullptr; + FunctionCallee KitsuneGPUInitField = nullptr; + FunctionCallee KitsuneGPUSetRunSize = nullptr; + FunctionCallee KitsuneGPURunKernel = nullptr; + FunctionCallee KitsuneGPUFinish = nullptr; +public: + PTXLoop(Module &M); + + void setupLoopOutlineArgs( + Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, + ValueSet &InputSet, const SmallVectorImpl &LCArgs, + const SmallVectorImpl &LCInputs, + const ValueSet &TLInputsFixed) + override final; + unsigned getIVArgIndex(const Function &F, const ValueSet &Args) const + override final; + unsigned getLimitArgIndex(const Function &F, const ValueSet &Args) const + override final; + void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) override final; + void processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, + DominatorTree &DT) override final; +}; +} + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/DRFScopedNoAliasAA.h b/llvm/include/llvm/Transforms/Tapir/DRFScopedNoAliasAA.h new file mode 100644 index 00000000000000..d0735df1ee9cad --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/DRFScopedNoAliasAA.h @@ -0,0 +1,29 @@ +//===- DRFScopedNoAlias.h - DRF-based scoped-noalias metadata ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Adds scoped-noalias metadata to memory accesses based on Tapir's parallel +// control flow constructs and the assumption that the function is data-race +// free. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_DRFSCOPEDNOALIASPASS_H +#define LLVM_TRANSFORMS_TAPIR_DRFSCOPEDNOALIASPASS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// The DRF-Based Scoped-Noalias Pass. +struct DRFScopedNoAliasPass : public PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/LambdaABI.h b/llvm/include/llvm/Transforms/Tapir/LambdaABI.h new file mode 100644 index 00000000000000..2fca38d533bfdd --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/LambdaABI.h @@ -0,0 +1,99 @@ +//===- LambdaABI.h - Generic interface to runtime systems -------*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Lambda ABI to convert Tapir instructions to calls +// into a generic runtime system to operates on spawned computations as lambdas. +// +//===----------------------------------------------------------------------===// +#ifndef LAMBDA_ABI_H_ +#define LAMBDA_ABI_H_ + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; +class TapirLoopInfo; + +class LambdaABI final : public TapirTarget { + ValueToValueMapTy DetachCtxToStackFrame; + + StringRef RuntimeBCPath = ""; + + // Runtime stack structure + StructType *StackFrameTy = nullptr; + FunctionType *SpawnBodyFnTy = nullptr; + Type *SpawnBodyFnArgTy = nullptr; + Type *SpawnBodyFnArgSizeTy = nullptr; + + // Runtime functions + FunctionCallee RTSEnterFrame = nullptr; + FunctionCallee RTSEnterHelperFrame = nullptr; + FunctionCallee RTSSpawn = nullptr; + FunctionCallee RTSLeaveFrame = nullptr; + FunctionCallee RTSLeaveHelperFrame = nullptr; + FunctionCallee RTSSync = nullptr; + FunctionCallee RTSSyncNoThrow = nullptr; + + FunctionCallee RTSLoopGrainsize8 = nullptr; + FunctionCallee RTSLoopGrainsize16 = nullptr; + FunctionCallee RTSLoopGrainsize32 = nullptr; + FunctionCallee RTSLoopGrainsize64 = nullptr; + + FunctionCallee RTSGetNumWorkers = nullptr; + FunctionCallee RTSGetWorkerID = nullptr; + + Align StackFrameAlign{8}; + + Value *CreateStackFrame(Function &F); + Value *GetOrCreateStackFrame(Function &F); + + CallInst *InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate = nullptr, + bool Helper = false); + void InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper); + +public: + LambdaABI(Module &M) : TapirTarget(M) {} + ~LambdaABI() { DetachCtxToStackFrame.clear(); } + + // void setOptions(const TapirTargetOptions &Options) override final; + + void prepareModule() override final; + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + // void lowerReducerOperation(CallBase *CI) override; + + ArgStructMode getArgStructMode() const override final { + return ArgStructMode::Static; + } + void addHelperAttributes(Function &F) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; +}; +} // namespace llvm + +#endif // LAMBDA_ABI_H diff --git a/llvm/include/llvm/Transforms/Tapir/LoopSpawningTI.h b/llvm/include/llvm/Transforms/Tapir/LoopSpawningTI.h new file mode 100644 index 00000000000000..44a3bc808e559c --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/LoopSpawningTI.h @@ -0,0 +1,27 @@ +//===- LoopSpawningTI.h - Spawn loop iterations efficiently -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass modifies Tapir loops to spawn their iterations efficiently. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_LOOPSPAWNING_H +#define LLVM_TRANSFORMS_TAPIR_LOOPSPAWNING_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// The LoopSpawning Pass. +struct LoopSpawningPass : public PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_TAPIR_LOOPSPAWNING_H diff --git a/llvm/include/llvm/Transforms/Tapir/LoopStripMine.h b/llvm/include/llvm/Transforms/Tapir/LoopStripMine.h new file mode 100644 index 00000000000000..270b77794620c7 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/LoopStripMine.h @@ -0,0 +1,54 @@ +//===- LoopStripMine.h - Tapir loop stripmining -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINE_H +#define LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Support/InstructionCost.h" + +namespace llvm { + +class AssumptionCache; +class DominatorTree; +class Loop; +class LoopInfo; +class MDNode; +class OptimizationRemarkEmitter; +class ScalarEvolution; +class TargetLibraryInfo; +class TaskInfo; + +using NewLoopsMap = SmallDenseMap; + +void simplifyLoopAfterStripMine(Loop *L, bool SimplifyIVs, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + const TargetTransformInfo &TTI, + AssumptionCache *AC); + +TargetTransformInfo::StripMiningPreferences gatherStripMiningPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + std::optional UserCount); + +bool computeStripMineCount(Loop *L, const TargetTransformInfo &TTI, + InstructionCost LoopCost, + TargetTransformInfo::StripMiningPreferences &UP); + +Loop *StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, + bool UnrollRemainder, LoopInfo *LI, ScalarEvolution *SE, + DominatorTree *DT, const TargetTransformInfo &TTI, + AssumptionCache *AC, TaskInfo *TI, + OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, + bool ParallelEpilog, bool NeedNestedSync, + Loop **Remainderloop = nullptr); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINE_H diff --git a/llvm/include/llvm/Transforms/Tapir/LoopStripMinePass.h b/llvm/include/llvm/Transforms/Tapir/LoopStripMinePass.h new file mode 100644 index 00000000000000..5b130c3e89d624 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/LoopStripMinePass.h @@ -0,0 +1,32 @@ +//===- LoopStripMinePass.h - Tapir loop stripmining -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINEPASS_H +#define LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINEPASS_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" + +namespace llvm { + +class Function; + +extern cl::opt EnableTapirLoopStripmine; + +/// Loop stripmining pass. It is a function pass to have access to function and +/// module analyses. +class LoopStripMinePass : public PassInfoMixin { +public: + explicit LoopStripMinePass() {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_TAPIR_LOOPSTRIPMINEPASS_H diff --git a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h new file mode 100644 index 00000000000000..27fa8f2e465ce5 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h @@ -0,0 +1,565 @@ +//===- LoweringUtils.h - Utility functions for lowering Tapir --*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements several utility functions for lowering Tapir. +// +//===----------------------------------------------------------------------===// + +#ifndef LOWERING_UTILS_H_ +#define LOWERING_UTILS_H_ + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +namespace llvm { + +class AAResults; +class AssumptionCache; +class BasicBlock; +class DominatorTree; +class Function; +class Loop; +class LoopOutlineProcessor; +class Spindle; +class TapirLoopInfo; +class Task; +class TaskInfo; +class Value; + +using ValueSet = SetVector; +using SpindleSet = SetVector; +using TaskValueSetMap = DenseMap; +using TFValueSetMap = DenseMap; + +struct OutlineAnalysis { + OutlineAnalysis(AAResults &AA, AssumptionCache &AC, DominatorTree &DT) + : AA(AA), AC(AC), DT(DT) { } + AAResults &AA; + AssumptionCache &AC; + DominatorTree &DT; +}; + +/// Structure that captures relevant information about an outlined task, +/// including the following: +/// -) A pointer to the outlined function. +/// -) The inputs passed to the call or invoke of that outlined function. +/// -) Pointers to the instructions that replaced the detach in the parent +/// function, ending with the call or invoke instruction to the outlined +/// function. +/// -) The normal and unwind destinations of the call or invoke of the outlined +/// function. +struct TaskOutlineInfo { + // The outlined helper function. + Function *Outline = nullptr; + + // Instruction in Outline corresponding to the detach point. + Instruction *DetachPt = nullptr; + + // Instruction in Outline corresponding to the taskframe.create. + Instruction *TaskFrameCreate = nullptr; + + // The set of values in the caller passed to the helper function. These + // values might be passed directly to a call to the helper function, or they + // might be marshalled into a structure. + ValueSet InputSet; + + // Instruction denoting the start of the code in the caller that replaced the + // task or Tapir loop. + Instruction *ReplStart = nullptr; + + // Instruction denoting the call or invoke instruction in the caller that + // calls the outlined helper function. + Instruction *ReplCall = nullptr; + + // Basic block to which the call to the outlined helper function returns. + // For an outlined task, this block corresponds to the continuation block + // of the original detach instruction. For an outlined Tapir loop, this + // block corresponds to the normal exit block after the loop latch. + BasicBlock *ReplRet = nullptr; + + // Basic block denoting the unwind destination of an invocation of the + // outlined helper function. This block corresponds to the unwind block of + // the original detach instruction, or nullptr if the original detach had no + // unwind block. + BasicBlock *ReplUnwind = nullptr; + + // Pointer to the basic block corresponding with the entry of this outlined + // taskframe in the function from which this taskframe was outlined. This + // pointer is maintained to help Tapir targets use taskframe-entry blocks as + // keys for target-specific maps. + BasicBlock *OriginalTFEntry = nullptr; + + TaskOutlineInfo() = default; + TaskOutlineInfo(Function *Outline, BasicBlock *OriginalTFEntry, + Instruction *DetachPt, Instruction *TaskFrameCreate, + ValueSet &InputSet, Instruction *ReplStart, + Instruction *ReplCall, BasicBlock *ReplRet, + BasicBlock *ReplUnwind = nullptr) + : Outline(Outline), DetachPt(DetachPt), TaskFrameCreate(TaskFrameCreate), + InputSet(InputSet), ReplStart(ReplStart), ReplCall(ReplCall), + ReplRet(ReplRet), ReplUnwind(ReplUnwind), + OriginalTFEntry(OriginalTFEntry) {} + + // Replaces the stored call or invoke instruction to the outlined function + // with \p NewReplCall, and updates other information in this TaskOutlineInfo + // struct appropriately. + void replaceReplCall(Instruction *NewReplCall) { + if (ReplStart == ReplCall) + ReplStart = NewReplCall; + ReplCall = NewReplCall; + } + + // Helper routine to remap relevant TaskOutlineInfo values in the event, for + // instance, that these values are themselves outlined. + void remapOutlineInfo(ValueToValueMapTy &VMap, ValueToValueMapTy &InputMap) { + ReplStart = cast(VMap[ReplStart]); + ReplCall = cast(VMap[ReplCall]); + ReplRet = cast(VMap[ReplRet]); + if (ReplUnwind) + ReplUnwind = cast(VMap[ReplUnwind]); + + // Remap the contents of InputSet. + ValueSet NewInputSet; + for (Value *V : InputSet) { + if (VMap[V]) + NewInputSet.insert(VMap[V]); + else if (InputMap[V] && VMap[InputMap[V]]) + NewInputSet.insert(VMap[InputMap[V]]); + else + NewInputSet.insert(V); + } + InputSet = NewInputSet; + } +}; + +// Map from tasks to TaskOutlineInfo structures. +using TaskOutlineMapTy = DenseMap; +using TFOutlineMapTy = DenseMap; + +/// Abstract class for a parallel-runtime-system target for Tapir lowering. +/// +/// The majority of the Tapir-lowering infrastructure focuses on outlining Tapir +/// tasks into separate functions, which is a common lowering step for many +/// different back-ends. Most of the heavy-lifting for this outlining process +/// is handled by the lowering infrastructure itself, implemented in +/// TapirToTarget and LoweringUtils. The TapirTarget class defines several +/// callbacks to tailor this lowering process for a particular back-end. +/// +/// The high-level Tapir-lowering algorithm, including the TapirTarget +/// callbacks, operates as follows: +/// +/// 1) For each Function F in the Module, call +/// TapirTarget::shouldProcessFunction(F) to decide whether to enqueue F for +/// processing. +/// +/// 2) Process each enqueued Function F as follows: +/// +/// a) Run TapirTarget::preProcessFunction(F). +/// +/// b) If TapirTarget::shouldDoOutlining(F) returns false, skip the subsequent +/// outlining steps, and only process grainsize calls, task-frameaddress +/// calls, and sync instructions in F. +/// +/// c) For each Tapir task T in F in post-order: +/// +/// i) Prepare the set of inputs to a helper function for T, using the +/// return value of OutlineProcessor::getArgStructMode() to guide this +/// preparation. For example, if getArgStructMode() != None, insert code to +/// allocate a structure and marshal the inputs in that structure. +/// +/// ii) Outline T into a new Function Helper, using the set of inputs +/// prepared in step 2ci and a constant NULL return value of type +/// TapirTarget::getReturnType(). +/// +/// iii) Run TapirTarget::addHelperAttributes(Helper). +/// +/// d) Let Helper[T] denote the outlined Function for a task T. +/// +/// e) For each Tapir task T in F in post-order: +/// +/// i) Run TapirTarget::preProcessOutlinedTask(Helper[T]). +/// +/// ii) For each subtask SubT spawned by Helper[T], run +/// TapirTarget::processSubTaskCall(Helper[SubT]) +/// +/// iii) Run TapirTarget::postProcessOutlinedTask(Helper[T]). +/// +/// iv) Process the grainsize calls, task-frameaddress calls, and sync +/// instructions in Helper[T]. +/// +/// e) If F spawns tasks, run TapirTarget::preProcessRootSpawner(F); then, for +/// each child task T of F, run TapirTarget::processSubTaskCall(Helper[T]); +/// and finally run TapirTarget::postProcessRootSpawner(F). +/// +/// f) Process the grainsize calls, task-frameaddress calls, and sync +/// instructions in F. +/// +/// g) Run TapirTarget::postProcessFunction(F). +/// +/// h) For each generated helper Function Helper, run +/// TapirTarget::postProcessHelper(Helper). +class TapirTarget { +protected: + /// The Module of the original Tapir code. + Module &M; + /// The Module into which the outlined Helper functions will be placed. + Module &DestM; + + TapirTarget(Module &M, Module &DestM) : M(M), DestM(DestM) {} + +public: + // Enumeration of ways arguments can be passed to outlined functions. + enum class ArgStructMode { + None, // Pass arguments directly. + Static, // Statically allocate a structure to store arguments. + Dynamic // Dynamically allocate a structure to store arguments. + }; + + TapirTarget(Module &M) : M(M), DestM(M) {} + virtual ~TapirTarget() {} + + virtual void setOptions(const TapirTargetOptions &Options) {} + + // Prepare the module for final Tapir lowering. + virtual void prepareModule() {} + + /// Lower a call to the tapir.loop.grainsize intrinsic into a grainsize + /// (coarsening) value. + virtual Value *lowerGrainsizeCall(CallInst *GrainsizeCall) = 0; + + /// Lower a call to the task.frameaddress intrinsic to get the frame pointer + /// for the containing function, i.e., after the task has been outlined. + virtual void lowerTaskFrameAddrCall(CallInst *TaskFrameAddrCall); + + /// Lower a Tapir sync instruction SI. + virtual void lowerSync(SyncInst &SI) = 0; + + virtual void lowerReducerOperation(CallBase *Call) { + } + + /// Lower calls to the tapir.runtime.{start,end} intrinsics. Only + /// tapir.runtime.start intrinsics are stored; uses of those intrinsics + /// identify the tapir.runtime.end intrinsics to lower. + virtual void lowerTapirRTCalls(SmallVectorImpl &TapirRTCalls, + Function &F, BasicBlock *TFEntry); + + // TODO: Add more options to control outlining. + + /// Returns true if Function F should be processed. + virtual bool shouldProcessFunction(const Function &F) const; + + /// Returns true if tasks in Function F should be outlined into their own + /// functions. Such outlining is a common step for many Tapir backends. + virtual bool shouldDoOutlining(const Function &F) const { return true; } + + /// Process Function F before any function outlining is performed. This + /// routine should not modify the CFG structure, unless it processes all Tapir + /// instructions in F itself. Returns true if it modifies the CFG, false + /// otherwise. + virtual bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops = false) = 0; + + /// Returns an ArgStructMode enum value describing how inputs to a task should + /// be passed to the task, e.g., directly as arguments to the outlined + /// function, or marshalled in a structure. + virtual ArgStructMode getArgStructMode() const { return ArgStructMode::None; } + + /// Get the return type of an outlined function for a task. + virtual Type *getReturnType() const { + return Type::getVoidTy(DestM.getContext()); + } + + /// Get the Module where outlined Helper will be placed. + Module &getDestinationModule() const { return DestM; } + + // Add attributes to the Function Helper produced from outlining a task. + virtual void addHelperAttributes(Function &Helper) {} + + // Remap any Target-local structures after taskframe starting at TFEntry is + // outlined. + virtual void remapAfterOutlining(BasicBlock *TFEntry, + ValueToValueMapTy &VMap) {} + + // Pre-process the Function F that has just been outlined from a task. This + // routine is executed on each outlined function by traversing in post-order + // the tasks in the original function. + virtual void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) = 0; + + // Post-process the Function F that has just been outlined from a task. This + // routine is executed on each outlined function by traversing in post-order + // the tasks in the original function. + virtual void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) = 0; + + // Pre-process the root Function F as a function that can spawn subtasks. + virtual void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) = 0; + + // Post-process the root Function F as a function that can spawn subtasks. + virtual void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) = 0; + + // Process the invocation of a task for an outlined function. This routine is + // invoked after processSpawner once for each child subtask. + virtual void processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) = 0; + + // Process Function F at the end of the lowering process. + virtual void postProcessFunction(Function &F, + bool ProcessingTapirLoops = false) = 0; + + // Process a generated helper Function F produced via outlining, at the end of + // the lowering process. + virtual void postProcessHelper(Function &F) = 0; + + virtual bool processOrdinaryFunction(Function &F, BasicBlock *TFEntry); + + // Get the LoopOutlineProcessor associated with this Tapir target. + virtual LoopOutlineProcessor * + getLoopOutlineProcessor(const TapirLoopInfo *TL) const { + return nullptr; + } +}; + +/// A loop-outline processor customizes the transformation of Tapir loops, +/// outlined via LoopSpawningTI, for a particular back-end. A loop-outline +/// processor serves a similar role for the LoopSpawningTI pass as a TapirTarget +/// serves for Tapir lowering. +/// +/// The LoopSpawningTI pass outlines Tapir loops by examining each Function F in +/// a Module and performing the following algorithm: +/// +/// 1) Analyze all loops in Function F to discover Tapir loops that are amenable +/// to LoopSpawningTI. +/// +/// 2) Run TapirTarget::preProcessFunction(F, OutliningTapirLoops = true). +/// +/// 3) Process each Tapir loop L as follows: +/// +/// a) Prepare the set of inputs to the helper function derived from the Tapir +/// task in L, using the return value of OutlineProcessor::getArgStructMode() +/// to guide this preparation. For example, if getArgStructMode() != None, +/// insert code to allocate a structure and marshal the inputs in that +/// structure. +/// +/// b) Run OutlineProcessor::setupLoopOutlineArgs() to get the complete set +/// of inputs for the outlined helper function for L. +/// +/// c) Outline L into a Function Helper, whose inputs are the prepared set of +/// inputs produced in step 2b and whose return type is void. This outlining +/// step uses OutlineProcessor::getIVArgIndex() and +/// OutlineProcessor::getLimitArgIndex() to identify the helper input +/// parameters that specify the strating and ending iterations, respectively. +/// +/// d) Call OutlineProcessor::postProcessOutline(Helper). +/// +/// 4) For each Tapir loop L in F in post-order, run +/// OutlineProcessor::processOutlinedLoopCall(). +/// +/// 5) Run TapirTarget::postProcessFunction(F, OutliningTapirLoops = true). +/// +/// Two generic loop-outline processors are provided with LoopSpawningTI. The +/// default loop-outline processor performs no special modifications to outlined +/// Tapir loops. The DACSpawning loop-outline processor transforms an outlined +/// Tapir loop to evaluate the iterations using parallel recursive +/// divide-and-conquer. +class LoopOutlineProcessor { +protected: + /// The Module of the original Tapir code. + Module &M; + /// The Module into which the outlined Helper functions will be placed. + Module &DestM; + + LoopOutlineProcessor(Module &M, Module &DestM) : M(M), DestM(DestM) {} +public: + using ArgStructMode = TapirTarget::ArgStructMode; + + LoopOutlineProcessor(Module &M) : M(M), DestM(M) {} + virtual ~LoopOutlineProcessor() = default; + + /// Returns an ArgStructMode enum value describing how inputs to the + /// underlying task of a Tapir loop should be passed to the task, e.g., + /// directly as arguments to the outlined function, or marshalled in a + /// structure. + virtual ArgStructMode getArgStructMode() const { + return ArgStructMode::None; + } + + /// Prepares the set HelperArgs of function arguments for the outlined helper + /// function Helper for a Tapir loop. Also prepares the list HelperInputs of + /// input values passed to a call to Helper. HelperArgs and HelperInputs are + /// derived from the loop-control arguments LCArgs and loop-control inputs + /// LCInputs for the Tapir loop, as well the set TLInputsFixed of arguments to + /// the task underlying the Tapir loop. + virtual void setupLoopOutlineArgs( + Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, + ValueSet &InputSet, const SmallVectorImpl &LCArgs, + const SmallVectorImpl &LCInputs, const ValueSet &TLInputsFixed); + + /// Get the Module where outlined Helper will be placed. + Module &getDestinationModule() const { return DestM; } + + /// Returns an integer identifying the index of the helper-function argument + /// in Args that specifies the starting iteration number. This return value + /// must complement the behavior of setupLoopOutlineArgs(). + virtual unsigned getIVArgIndex(const Function &F, const ValueSet &Args) const; + + /// Returns an integer identifying the index of the helper-function argument + /// in Args that specifies the ending iteration number. This return value + /// must complement the behavior of setupLoopOutlineArgs(). + virtual unsigned getLimitArgIndex(const Function &F, + const ValueSet &Args) const { + return getIVArgIndex(F, Args) + 1; + } + + /// Processes an outlined Function Helper for a Tapir loop, just after the + /// function has been outlined. + virtual void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap); + + /// Add syncs to the escape points of each helper function. This operation is + /// a common post-processing step for outlined helper functions. + void addSyncToOutlineReturns(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap); + + /// Move Cilksan instrumentation out of cloned loop. + void moveCilksanInstrumentation(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap); + + /// Remap any data members of the LoopOutlineProcessor. This method is called + /// whenever a loop L is outlined, in order to update data for subloops of L. + virtual void remapData(ValueToValueMapTy &VMap) {}; + + /// Processes a call to an outlined Function Helper for a Tapir loop. + virtual void processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, + DominatorTree &DT) {} +}; + +/// Generate a TapirTarget object for the specified TapirTargetID. +TapirTarget *getTapirTargetFromID(Module &M, TapirTargetID TargetID); + +/// Find all inputs to tasks within a function \p F, including nested tasks. +TaskValueSetMap findAllTaskInputs(Function &F, const DominatorTree &DT, + const TaskInfo &TI); + +void getTaskFrameInputsOutputs(TFValueSetMap &TFInputs, + TFValueSetMap &TFOutputs, + const Spindle &TF, const ValueSet *TaskInputs, + const TaskInfo &TI, const DominatorTree &DT); + +void findAllTaskFrameInputs(TFValueSetMap &TFInputs, + TFValueSetMap &TFOutputs, + const SmallVectorImpl &AllTaskFrames, + Function &F, const DominatorTree &DT, TaskInfo &TI); + +/// Create a struct to store the inputs to pass to an outlined function for the +/// task \p T. Stores into the struct will be inserted \p StorePt, which should +/// precede the detach. Loads from the struct will be inserted at \p LoadPt, +/// which should be inside \p T. If a Tapir loop \p TapirL is specified, then +/// its header block is also considered a valid load point. +std::pair +createTaskArgsStruct(const ValueSet &Inputs, Task *T, Instruction *StorePt, + Instruction *LoadPt, bool staticStruct, + ValueToValueMapTy &InputsMap, + Loop *TapirL = nullptr); + +/// Organize the set \p Inputs of values in \p F into a set \p Fixed of values +/// that can be used as inputs to a helper function. +void fixupInputSet(Function &F, const ValueSet &Inputs, ValueSet &Fixed); + +/// Organize the inputs to task \p T, given in \p TaskInputs, to create an +/// appropriate set of inputs, \p HelperInputs, to pass to the outlined function +/// for \p T. If a Tapir loop \p TapirL is specified, then its header block is +/// also used in fixing up inputs. +Instruction *fixupHelperInputs(Function &F, Task *T, ValueSet &TaskInputs, + ValueSet &HelperInputs, Instruction *StorePt, + Instruction *LoadPt, + TapirTarget::ArgStructMode useArgStruct, + ValueToValueMapTy &InputsMap, + Loop *TapirL = nullptr); + +/// Returns true if BasicBlock \p B is the immediate successor of only +/// detached-rethrow instructions. +bool isSuccessorOfDetachedRethrow(const BasicBlock *B); + +/// Collect the set of blocks in task \p T. All blocks enclosed by \p T will be +/// pushed onto \p TaskBlocks. The set of blocks terminated by reattaches from +/// \p T are added to \p ReattachBlocks. The set of blocks terminated by +/// detached-rethrow instructions are added to \p DetachedRethrowBlocks. The +/// set of entry points to exception-handling blocks shared by \p T and other +/// tasks in the same function are added to \p SharedEHEntries. +void getTaskBlocks(Task *T, std::vector &TaskBlocks, + SmallPtrSetImpl &ReattachBlocks, + SmallPtrSetImpl &TaskResumeBlocks, + SmallPtrSetImpl &SharedEHEntries, + const DominatorTree *DT); + +/// Outlines the content of task \p T in function \p F into a new helper +/// function. The parameter \p Inputs specified the inputs to the helper +/// function. The map \p VMap is updated with the mapping of instructions in +/// \p T to instructions in the new helper function. +Function *createHelperForTask( + Function &F, Task *T, ValueSet &Inputs, Module *DestM, + ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA); + +/// Outlines the content of taskframe \p TF in function \p F into a new helper +/// function. The parameter \p Inputs specified the inputs to the helper +/// function. The map \p VMap is updated with the mapping of instructions in \p +/// TF to instructions in the new helper function. +Function *createHelperForTaskFrame( + Function &F, Spindle *TF, ValueSet &Args, Module *DestM, + ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA); + +/// Replaces the taskframe \p TF, with associated TaskOutlineInfo \p Out, with a +/// call or invoke to the outlined helper function created for \p TF. +Instruction *replaceTaskFrameWithCallToOutline( + Spindle *TF, TaskOutlineInfo &Out, SmallVectorImpl &OutlineInputs); + +/// Outlines a task \p T into a helper function that accepts the inputs \p +/// Inputs. The map \p VMap is updated with the mapping of instructions in \p T +/// to instructions in the new helper function. Information about the helper +/// function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo outlineTask( + Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + ValueToValueMapTy &InputMap, OutlineAnalysis &OA); + +/// Outlines a taskframe \p TF into a helper function that accepts the inputs \p +/// Inputs. The map \p VMap is updated with the mapping of instructions in \p +/// TF to instructions in the new helper function. Information about the helper +/// function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo outlineTaskFrame( + Spindle *TF, ValueSet &Inputs, SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + ValueToValueMapTy &InputMap, OutlineAnalysis &OA); + +//----------------------------------------------------------------------------// +// Methods for lowering Tapir loops + +/// Given a Tapir loop \p TL and the set of inputs to the task inside that loop, +/// returns the set of inputs for the Tapir loop itself. +ValueSet getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs); + + +/// Replaces the Tapir loop \p TL, with associated TaskOutlineInfo \p Out, with +/// a call or invoke to the outlined helper function created for \p TL. +Instruction *replaceLoopWithCallToOutline( + TapirLoopInfo *TL, TaskOutlineInfo &Out, + SmallVectorImpl &OutlineInputs); + +} // end namepsace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/OMPTaskABI.h b/llvm/include/llvm/Transforms/Tapir/OMPTaskABI.h new file mode 100644 index 00000000000000..87b97aff36bbcc --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/OMPTaskABI.h @@ -0,0 +1,99 @@ +//===- OMPTaskABI.h - Generic interface to runtime systems -------*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OMP Task ABI to convert Tapir instructions to calls +// into kmpc task runtime calls. +// +//===----------------------------------------------------------------------===// +#ifndef OMPTASK_ABI_H_ +#define OMPTASK_ABI_H_ + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; +class TapirLoopInfo; + +class OMPTaskABI final : public TapirTarget { + ValueToValueMapTy DetachCtxToStackFrame; + + StringRef RuntimeBCPath = ""; + + // Runtime stack structure + StructType *StackFrameTy = nullptr; + StructType *TaskTy = nullptr; + FunctionType *SpawnBodyFnTy = nullptr; + Type *SpawnBodyFnArgTy = nullptr; + Type *SpawnBodyFnArgSizeTy = nullptr; + + // Runtime functions + FunctionCallee RTSEnterFrame = nullptr; + FunctionCallee RTSGetArgsFromTask = nullptr; + FunctionCallee RTSSpawn = nullptr; + FunctionCallee RTSSync = nullptr; + FunctionCallee RTSSyncNoThrow = nullptr; + + FunctionCallee RTSLoopGrainsize8 = nullptr; + FunctionCallee RTSLoopGrainsize16 = nullptr; + FunctionCallee RTSLoopGrainsize32 = nullptr; + FunctionCallee RTSLoopGrainsize64 = nullptr; + + FunctionCallee RTSGetNumWorkers = nullptr; + FunctionCallee RTSGetWorkerID = nullptr; + + Align StackFrameAlign{8}; + + Value *CreateStackFrame(Function &F); + Value *GetOrCreateStackFrame(Function &F); + + CallInst *InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate = nullptr, + bool Helper = false); + void InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper); + +public: + OMPTaskABI(Module &M) : TapirTarget(M) {} + ~OMPTaskABI() { DetachCtxToStackFrame.clear(); } + + // void setOptions(const TapirTargetOptions &Options) override final; + + void prepareModule() override final; + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + // void lowerReducerOperation(CallBase *CI) override; + + ArgStructMode getArgStructMode() const override final { + return ArgStructMode::Static; + } + void addHelperAttributes(Function &F) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; + +}; +} // namespace llvm + +#endif // OMPTASK_ABI_H diff --git a/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h b/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h new file mode 100644 index 00000000000000..a4bae8a902ca5f --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/OpenCilkABI.h @@ -0,0 +1,197 @@ +//===- OpenilkABI.h - Interface to the OpenCilk runtime system ---*- C++ -*--=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenCilk ABI to convert Tapir instructions to calls +// into the OpenCilk runtime system. +// +//===----------------------------------------------------------------------===// +#ifndef OPEN_CILK_ABI_H_ +#define OPEN_CILK_ABI_H_ + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; +class TapirLoopInfo; + +class OpenCilkABI final : public TapirTarget { + ValueToValueMapTy DetachCtxToStackFrame; + SmallPtrSet CallsToInline; + DenseMap> TapirRTCalls; + ValueToValueMapTy DefaultSyncLandingpad; + + StringRef RuntimeBCPath = ""; + + // Cilk RTS data types + StructType *StackFrameTy = nullptr; + StructType *WorkerTy = nullptr; + + // Opaque Cilk RTS functions + FunctionCallee CilkRTSEnterFrame = nullptr; + FunctionCallee CilkRTSEnterFrameHelper = nullptr; + FunctionCallee CilkRTSDetach = nullptr; + FunctionCallee CilkRTSLeaveFrame = nullptr; + FunctionCallee CilkRTSLeaveFrameHelper = nullptr; + FunctionCallee CilkPrepareSpawn = nullptr; + FunctionCallee CilkSync = nullptr; + FunctionCallee CilkSyncNoThrow = nullptr; + FunctionCallee CilkParentEpilogue = nullptr; + FunctionCallee CilkHelperEpilogue = nullptr; + FunctionCallee CilkRTSEnterLandingpad = nullptr; + FunctionCallee CilkRTSPauseFrame = nullptr; + + FunctionCallee CilkRTSReducerRegister32 = nullptr; + FunctionCallee CilkRTSReducerRegister64 = nullptr; + FunctionCallee CilkRTSReducerUnregister = nullptr; + FunctionCallee CilkRTSReducerLookup = nullptr; + + // Accessors for opaque Cilk RTS functions + FunctionCallee CilkHelperEpilogueExn = nullptr; + FunctionCallee CilkRTSCilkForGrainsize8 = nullptr; + FunctionCallee CilkRTSCilkForGrainsize16 = nullptr; + FunctionCallee CilkRTSCilkForGrainsize32 = nullptr; + FunctionCallee CilkRTSCilkForGrainsize64 = nullptr; + + MaybeAlign StackFrameAlign{8}; + + // Accessors for CilkRTS ABI functions. When a bitcode file is loaded, these + // functions should return the function defined in the bitcode file. + // Otherwise, these functions will return FunctionCallees for placeholder + // declarations of these functions. The latter case is intended for debugging + // ABI-call insertion. + FunctionCallee Get__cilkrts_enter_frame() { + return CilkRTSEnterFrame; + } + FunctionCallee Get__cilkrts_enter_frame_helper() { + return CilkRTSEnterFrameHelper; + } + FunctionCallee Get__cilkrts_detach() { + return CilkRTSDetach; + } + FunctionCallee Get__cilkrts_leave_frame() { + return CilkRTSLeaveFrame; + } + FunctionCallee Get__cilkrts_leave_frame_helper() { + return CilkRTSLeaveFrameHelper; + } + FunctionCallee Get__cilkrts_pause_frame() { + return CilkRTSPauseFrame; + } + FunctionCallee Get__cilkrts_enter_landingpad() { + return CilkRTSEnterLandingpad; + } + FunctionCallee Get__cilkrts_cilk_for_grainsize_8() { + return CilkRTSCilkForGrainsize8; + } + FunctionCallee Get__cilkrts_cilk_for_grainsize_16() { + return CilkRTSCilkForGrainsize16; + } + FunctionCallee Get__cilkrts_cilk_for_grainsize_32() { + return CilkRTSCilkForGrainsize32; + } + FunctionCallee Get__cilkrts_cilk_for_grainsize_64() { + return CilkRTSCilkForGrainsize64; + } + FunctionCallee Get__cilkrts_reducer_register(unsigned Bits) { + if (Bits == 32) + return CilkRTSReducerRegister32; + if (Bits == 64) + return CilkRTSReducerRegister64; + return 0; + } + FunctionCallee Get__cilkrts_reducer_unregister() { + return CilkRTSReducerUnregister; + } + FunctionCallee Get__cilkrts_reducer_lookup() { + return CilkRTSReducerLookup; + } + + // Helper functions for implementing the Cilk ABI protocol + FunctionCallee GetCilkPrepareSpawnFn() { + return CilkPrepareSpawn; + } + FunctionCallee GetCilkSyncFn() { + return CilkSync; + } + FunctionCallee GetCilkSyncNoThrowFn() { + return CilkSyncNoThrow; + } + FunctionCallee GetCilkParentEpilogueFn() { + return CilkParentEpilogue; + } + FunctionCallee GetCilkHelperEpilogueFn() { + return CilkHelperEpilogue; + } + FunctionCallee GetCilkHelperEpilogueExnFn() { + return CilkHelperEpilogueExn; + } + + void GetTapirRTCalls(Spindle *TaskFrame, bool IsRootTask, TaskInfo &TI); + void LowerTapirRTCalls(Function &F, BasicBlock *TFEntry); + + Value *CreateStackFrame(Function &F); + Value *GetOrCreateCilkStackFrame(Function &F); + + CallInst *InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate = nullptr, + bool Helper = false); + void InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper); + + void InsertDetach(Function &F, Instruction *DetachPt); + + void MarkSpawner(Function &F); + + BasicBlock *GetDefaultSyncLandingpad(Function &F, Value *SF, DebugLoc Loc); + +public: + OpenCilkABI(Module &M); + ~OpenCilkABI() { DetachCtxToStackFrame.clear(); } + + void setOptions(const TapirTargetOptions &Options) override final; + + void prepareModule() override final; + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + void lowerReducerOperation(CallBase *CI) override; + + ArgStructMode getArgStructMode() const override final { + return ArgStructMode::None; + } + void addHelperAttributes(Function &F) override final; + + void remapAfterOutlining(BasicBlock *TFEntry, + ValueToValueMapTy &VMap) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; + bool processOrdinaryFunction(Function &F, BasicBlock *TFEntry) override final; + + LoopOutlineProcessor * + getLoopOutlineProcessor(const TapirLoopInfo *TL) const override final; +}; +} // namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/OpenMPABI.h b/llvm/include/llvm/Transforms/Tapir/OpenMPABI.h new file mode 100644 index 00000000000000..f7fcb4ad417e30 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/OpenMPABI.h @@ -0,0 +1,66 @@ +//===- OpenMPABI.h - Interface to the OpenMP runtime -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenMP ABI to converts Tapir instructions to calls +// into the OpenMP runtime system. +// +//===----------------------------------------------------------------------===// +#ifndef OMP_ABI_H_ +#define OMP_ABI_H_ + +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { +class Value; + +enum OpenMPRuntimeFunction { + OMPRTL__kmpc_fork_call, + OMPRTL__kmpc_for_static_init_4, + OMPRTL__kmpc_for_static_fini, + OMPRTL__kmpc_master, + OMPRTL__kmpc_end_master, + OMPRTL__kmpc_omp_task_alloc, + OMPRTL__kmpc_omp_task, + OMPRTL__kmpc_omp_taskwait, + OMPRTL__kmpc_global_thread_num, + OMPRTL__kmpc_barrier, + OMPRTL__kmpc_global_num_threads, +}; + +enum OpenMPSchedType { + OMP_sch_static = 34, +}; + +class OpenMPABI : public TapirTarget { +public: + OpenMPABI(Module &M); + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final; + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final; + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; +}; + +} // end of llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/Outline.h b/llvm/include/llvm/Transforms/Tapir/Outline.h new file mode 100644 index 00000000000000..e94b20840eef5d --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/Outline.h @@ -0,0 +1,90 @@ +//===- Outline.h - Outlining for Tapir -------------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines helper functions for outlining portions of code containing +// Tapir instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_OUTLINE_H +#define LLVM_TRANSFORMS_TAPIR_OUTLINE_H + +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +namespace llvm { + +using ValueSet = SetVector; + +// Value materializer for Tapir outlining. +class OutlineMaterializer : public ValueMaterializer { + const Value *SrcSyncRegion = nullptr; +public: + OutlineMaterializer(const Value *SrcSyncRegion = nullptr) + : SrcSyncRegion(SrcSyncRegion) {} + virtual ~OutlineMaterializer() { + BlocksToRemap.clear(); + } + + Value *materialize(Value *V) override; + + SetVector BlocksToRemap; +}; + +/// Clone Blocks into NewFunc, transforming the old arguments into references to +/// VMap values. +/// +/// TODO: Fix the std::vector part of the type of this function. +void CloneIntoFunction( + Function *NewFunc, const Function *OldFunc, + std::vector Blocks, ValueToValueMapTy &VMap, + bool ModuleLevelChanges, SmallVectorImpl &Returns, + const StringRef NameSuffix, + SmallPtrSetImpl *ReattachBlocks = nullptr, + SmallPtrSetImpl *DetachedRethrowBlocks = nullptr, + SmallPtrSetImpl *SharedEHEntries = nullptr, + DISubprogram *SP = nullptr, ClonedCodeInfo *CodeInfo = nullptr, + ValueMapTypeRemapper *TypeMapper = nullptr, + OutlineMaterializer *Materializer = nullptr); + +/// Create a helper function whose signature is based on Inputs and +/// Outputs as follows: f(in0, ..., inN, out0, ..., outN) +/// +/// TODO: Fix the std::vector part of the type of this function. +Function * +CreateHelper(const ValueSet &Inputs, const ValueSet &Outputs, + std::vector Blocks, BasicBlock *Header, + const BasicBlock *OldEntry, const BasicBlock *OldExit, + ValueToValueMapTy &VMap, Module *DestM, bool ModuleLevelChanges, + SmallVectorImpl &Returns, const StringRef NameSuffix, + SmallPtrSetImpl *ReattachBlocks = nullptr, + SmallPtrSetImpl *TaskResumeBlocks = nullptr, + SmallPtrSetImpl *SharedEHEntries = nullptr, + const BasicBlock *OldUnwind = nullptr, + SmallPtrSetImpl *UnreachableExits = nullptr, + Type *ReturnType = nullptr, ClonedCodeInfo *CodeInfo = nullptr, + ValueMapTypeRemapper *TypeMapper = nullptr, + OutlineMaterializer *Materializer = nullptr); + +// Add alignment assumptions to parameters of outlined function, based on known +// alignment data in the caller. +void AddAlignmentAssumptions(const Function *Caller, const ValueSet &Args, + ValueToValueMapTy &VMap, + const Instruction *CallSite, + AssumptionCache *AC, DominatorTree *DT); + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/QthreadsABI.h b/llvm/include/llvm/Transforms/Tapir/QthreadsABI.h new file mode 100644 index 00000000000000..6b3b4bbf5b55cf --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/QthreadsABI.h @@ -0,0 +1,82 @@ +//===- QthreadsABI.h - Interface to the Qthreads runtime ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Qthreads back end to convert Tapir instructions to +// calls into the Qthreads runtime system. +// +//===----------------------------------------------------------------------===// +#ifndef QTHREADS_ABI_H_ +#define QTHREADS_ABI_H_ + +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { + +class QthreadsABI : public TapirTarget { + ValueToValueMapTy SyncRegionToSinc; + + Type *QthreadFTy = nullptr; + + // Opaque Qthreads RTS functions + FunctionCallee QthreadNumWorkers = nullptr; + FunctionCallee QthreadForkCopyargs = nullptr; + FunctionCallee QthreadInitialize = nullptr; + FunctionCallee QtSincCreate = nullptr; + FunctionCallee QtSincExpect = nullptr; + FunctionCallee QtSincSubmit = nullptr; + FunctionCallee QtSincWait = nullptr; + FunctionCallee QtSincDestroy = nullptr; + + // Accessors for opaque Qthreads RTS functions + FunctionCallee get_qthread_num_workers(); + FunctionCallee get_qthread_fork_copyargs(); + FunctionCallee get_qthread_initialize(); + FunctionCallee get_qt_sinc_create(); + FunctionCallee get_qt_sinc_expect(); + FunctionCallee get_qt_sinc_submit(); + FunctionCallee get_qt_sinc_wait(); + FunctionCallee get_qt_sinc_destroy(); + + Value *getOrCreateSinc(Value *SyncRegion, Function *F); +public: + QthreadsABI(Module &M); + ~QthreadsABI() { SyncRegionToSinc.clear(); } + + ArgStructMode getArgStructMode() const override final { + return ArgStructMode::Static; + } + Type *getReturnType() const override final { + return Type::getInt32Ty(M.getContext()); + } + + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &SI) override final; + + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final; + void postProcessHelper(Function &F) override final; + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final {} + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final {} + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final {} + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final { + } + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final; +}; + +} // end of llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/SerialABI.h b/llvm/include/llvm/Transforms/Tapir/SerialABI.h new file mode 100644 index 00000000000000..c7cd7f5b85306b --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/SerialABI.h @@ -0,0 +1,53 @@ +//===- SerialABI.h - Replace Tapir with serial projection ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Serial back end, which is used to convert Tapir +// instructions into their serial projection. +// +//===----------------------------------------------------------------------===// +#ifndef SERIAL_ABI_H_ +#define SERIAL_ABI_H_ + +#include "llvm/Transforms/Tapir/LoweringUtils.h" + +namespace llvm { + +class SerialABI : public TapirTarget { +public: + SerialABI(Module &M) : TapirTarget(M) {} + ~SerialABI() {} + + Value *lowerGrainsizeCall(CallInst *GrainsizeCall) override final; + void lowerSync(SyncInst &inst) override final; + + bool shouldDoOutlining(const Function &F) const override final { + return false; + } + bool preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) override final; + void postProcessFunction(Function &F, + bool ProcessingTapirLoops) override final {} + void postProcessHelper(Function &F) override final {} + + void preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final {} + void postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, bool IsSpawner, + BasicBlock *TFEntry) override final {} + void preProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final {} + void postProcessRootSpawner(Function &F, BasicBlock *TFEntry) override final { + } + void processSubTaskCall(TaskOutlineInfo &TOI, + DominatorTree &DT) override final {} +}; + +} // end of llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/SerializeSmallTasks.h b/llvm/include/llvm/Transforms/Tapir/SerializeSmallTasks.h new file mode 100644 index 00000000000000..8155b936037fd5 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/SerializeSmallTasks.h @@ -0,0 +1,29 @@ +//===- SerializeSmallTasks.h - Serialize small Tapir tasks ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_SERIALIZESMALLTASKS_H_ +#define LLVM_TRANSFORMS_TAPIR_SERIALIZESMALLTASKS_H_ + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +/// Pass to serialize small Tapir tasks, whose work is too little to overcome +/// the overhead of a spawn. +class SerializeSmallTasksPass : public PassInfoMixin { +public: + explicit SerializeSmallTasksPass() {} + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_TAPIR_SERIALIZESMALLTASKS_H_ diff --git a/llvm/include/llvm/Transforms/Tapir/TapirLoopInfo.h b/llvm/include/llvm/Transforms/Tapir/TapirLoopInfo.h new file mode 100644 index 00000000000000..2d35a0995a003d --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/TapirLoopInfo.h @@ -0,0 +1,252 @@ +//===- TapirLoopInfo.h - Utility functions for Tapir loops -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines utility functions for handling Tapir loops. +// +//===----------------------------------------------------------------------===// + +#ifndef TAPIR_LOOP_INFO_H_ +#define TAPIR_LOOP_INFO_H_ + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +namespace llvm { + +class AssumptionCache; +class BasicBlock; +class DominatorTree; +class ICmpInst; +class Instruction; +class OptimizationRemarkAnalysis; +class OptimizationRemarkEmitter; +class PHINode; +class PredicatedScalarEvolution; +class ScalarEvolution; +class TargetTransformInfo; + +/// Class for managing information about a Tapir loop, primarily for the purpose +/// of outlining Tapir loops. +/// +/// A Tapir loop is defined as an ordinary Loop whose body -- all code in the +/// loop except for the indiction variables and loop control --- is contained in +/// a spawned task. +class TapirLoopInfo { +public: + /// InductionList saves induction variables and maps them to the induction + /// descriptor. + using InductionList = MapVector; + + TapirLoopInfo(Loop *L, Task *T) : TheLoop(L), TheTask(T) { + // Get the exit block for this loop. + Instruction *TI = TheLoop->getLoopLatch()->getTerminator(); + ExitBlock = TI->getSuccessor(0); + if (ExitBlock == TheLoop->getHeader()) + ExitBlock = TI->getSuccessor(1); + + // Get the unwind destination for this loop. + DetachInst *DI = T->getDetach(); + if (DI->hasUnwindDest()) + UnwindDest = DI->getUnwindDest(); + } + + /// Constructor that automatically reads the metadata for the loop. + TapirLoopInfo(Loop *L, Task *T, OptimizationRemarkEmitter &ORE) + : TapirLoopInfo(L, T) { + readTapirLoopMetadata(ORE); + } + + ~TapirLoopInfo() { + if (StartIterArg) + delete StartIterArg; + if (EndIterArg) + delete EndIterArg; + if (GrainsizeArg) + delete GrainsizeArg; + + DescendantTasks.clear(); + Inductions.clear(); + } + + Loop *getLoop() const { return TheLoop; } + Task *getTask() const { return TheTask; } + + /// Top-level call to prepare a Tapir loop for outlining. + bool prepareForOutlining( + DominatorTree &DT, LoopInfo &LI, TaskInfo &TI, + PredicatedScalarEvolution &PSE, AssumptionCache &AC, const char *PassName, + OptimizationRemarkEmitter &ORE, const TargetTransformInfo &TTI); + + /// Gather all induction variables in this loop that need special handling + /// during outlining. + bool collectIVs(PredicatedScalarEvolution &PSE, const char *PassName, + OptimizationRemarkEmitter *ORE); + + /// Replace all induction variables in this loop that are not primary with + /// stronger forms. + void replaceNonPrimaryIVs(PredicatedScalarEvolution &PSE); + + /// Identify the loop condition instruction, and determine if the loop uses an + /// inclusive or exclusive range. + bool getLoopCondition(const char *PassName, OptimizationRemarkEmitter *ORE); + + /// Fix up external users of the induction variable. + void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, + PredicatedScalarEvolution &PSE); + + /// Returns (and creates if needed) the original loop trip count. + const SCEV *getBackedgeTakenCount(PredicatedScalarEvolution &PSE) const; + const SCEV *getExitCount(const SCEV *BackedgeTakenCount, + PredicatedScalarEvolution &PSE) const; + // Return a non-overflowing value representing the trip count. For the + // typical case of a loop over a non-inclusive range (e.g., i \in [0,n), + // excluding n), this value is the backedge count plus 1. But to avoid + // overflow conditions, for a loop over an inclusive range (e.g., i \in [0,n], + // including n), this value is simply the backedge count. Passes are expected + // to use isInclusiveRange() to determine when they need to handle loops over + // inclusive ranges as a special case. + Value *getOrCreateTripCount(PredicatedScalarEvolution &PSE, + const char *PassName, + OptimizationRemarkEmitter *ORE); + + /// Record task T as a descendant task under this loop and not under a + /// descendant Tapir loop. + void addDescendantTask(Task *T) { DescendantTasks.push_back(T); } + + /// Adds \p Phi, with induction descriptor ID, to the inductions list. This + /// can set \p Phi as the main induction of the loop if \p Phi is a better + /// choice for the main induction than the existing one. + void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID); + + /// Returns the original loop trip count, if it has been computed. + Value *getTripCount() const { + assert(TripCount.pointsToAliveValue() && + "TripCount does not point to alive value."); + return TripCount; + } + + /// Returns the original loop condition, if it has been computed. + ICmpInst *getCondition() const { return Condition; } + + /// Returns true if this loop condition includes the end iteration. + bool isInclusiveRange() const { return InclusiveRange; } + + /// Returns the widest induction type. + Type *getWidestInductionType() const { return WidestIndTy; } + + /// Returns true if there is a primary induction variable for this Tapir loop. + bool hasPrimaryInduction() const { + return (nullptr != PrimaryInduction); + } + + /// Get the primary induction variable for this Tapir loop. + const std::pair &getPrimaryInduction() const { + assert(PrimaryInduction && "No primary induction."); + return *Inductions.find(PrimaryInduction); + } + + /// Returns the induction variables found in the loop. + InductionList *getInductionVars() { return &Inductions; } + + /// Get the grainsize associated with this Tapir Loop. A return value of 0 + /// indicates the absence of a specified grainsize. + unsigned getGrainsize() const { return Grainsize; } + + /// Get the exit block assoicated with this Tapir loop. + BasicBlock *getExitBlock() const { return ExitBlock; } + + /// Get the unwind destination for this Tapir loop. + BasicBlock *getUnwindDest() const { return UnwindDest; } + + /// Get the set of tasks enclosed in this Tapir loop and not a descendant + /// Tapir loop. + void getEnclosedTasks(SmallVectorImpl &TaskVec) const { + TaskVec.push_back(TheTask); + for (Task *T : reverse(DescendantTasks)) + TaskVec.push_back(T); + } + + /// Update information on this Tapir loop based on its metadata. + void readTapirLoopMetadata(OptimizationRemarkEmitter &ORE); + + /// Get the debug location for this loop. + DebugLoc getDebugLoc() const { return TheTask->getDetach()->getDebugLoc(); } + + /// Create an analysis remark that explains why the transformation failed + /// + /// \p RemarkName is the identifier for the remark. If \p I is passed it is + /// an instruction that prevents the transformation. Otherwise \p TheLoop is + /// used for the location of the remark. \return the remark object that can + /// be streamed to. + /// + /// Based on createMissedAnalysis in the LoopVectorize pass. + static OptimizationRemarkAnalysis + createMissedAnalysis(const char *PassName, StringRef RemarkName, + const Loop *TheLoop, Instruction *I = nullptr); + +private: + /// The loop that we evaluate. + Loop *TheLoop; + + /// The task contained in this loop. + Task *TheTask; + + /// Descendants of TheTask that are enclosed by this loop and not a descendant + /// Tapir loop. + SmallVector DescendantTasks; + + /// The single exit block for this Tapir loop. + BasicBlock *ExitBlock = nullptr; + + /// The unwind destination of this Tapir loop, if it has one. + BasicBlock *UnwindDest = nullptr; + + /// Holds the primary induction variable. This is the counter of the loop. + PHINode *PrimaryInduction = nullptr; + + /// Holds all of the induction variables that we found in the loop. Notice + /// that inductions don't need to start at zero and that induction variables + /// can be pointers. + InductionList Inductions; + + /// Holds the widest induction type encountered. + Type *WidestIndTy = nullptr; + + /// Trip count of the original loop. + WeakTrackingVH TripCount; + + /// Latch condition of the original loop. + ICmpInst *Condition = nullptr; + bool InclusiveRange = false; + + /// Grainsize value to use for loop. A value of 0 indicates that a call to + /// Tapir's grainsize intrinsic should be used. + unsigned Grainsize = 0; + +public: + /// Placeholder argument values. + Argument *StartIterArg = nullptr; + Argument *EndIterArg = nullptr; + Argument *GrainsizeArg = nullptr; +}; + +/// Transforms an induction descriptor into a direct computation of its value at +/// Index. +Value *emitTransformedIndex( + IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL, + const InductionDescriptor &ID); + +} // end namepsace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/TapirTargetIDs.h b/llvm/include/llvm/Transforms/Tapir/TapirTargetIDs.h new file mode 100644 index 00000000000000..0e6f717a268fdd --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/TapirTargetIDs.h @@ -0,0 +1,84 @@ +//===- TapirTargetIDs.h - Tapir target ID's --------------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file enumerates the available Tapir lowering targets. +// +//===----------------------------------------------------------------------===// + +#ifndef TAPIR_TARGET_IDS_H_ +#define TAPIR_TARGET_IDS_H_ + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" + +namespace llvm { + +enum class TapirTargetID { + None, // Perform no lowering + Serial, // Lower to serial projection + Cheetah, // Lower to the Cheetah ABI + Cilk, // Lower to the Cilk Plus ABI + Lambda, // Lower to generic Lambda ABI + OMPTask, // Lower to OpenMP task ABI + OpenCilk, // Lower to OpenCilk ABI + Qthreads, // Lower to Qthreads + Last_TapirTargetID +}; + +// Tapir target options + +// Virtual base class for Target-specific options. +class TapirTargetOptions { +public: + enum TapirTargetOptionKind { TTO_OpenCilk, Last_TTO }; + +private: + const TapirTargetOptionKind Kind; + +public: + TapirTargetOptionKind getKind() const { return Kind; } + + TapirTargetOptions(TapirTargetOptionKind K) : Kind(K) {} + TapirTargetOptions(const TapirTargetOptions &) = delete; + TapirTargetOptions &operator=(const TapirTargetOptions &) = delete; + virtual ~TapirTargetOptions() {} + + // Top-level method for cloning TapirTargetOptions. Defined in + // TargetLibraryInfo. + TapirTargetOptions *clone() const; +}; + +// Options for OpenCilkABI Tapir target. +class OpenCilkABIOptions : public TapirTargetOptions { + std::string RuntimeBCPath; + + OpenCilkABIOptions() = delete; + +public: + OpenCilkABIOptions(StringRef Path) + : TapirTargetOptions(TTO_OpenCilk), RuntimeBCPath(Path) {} + + StringRef getRuntimeBCPath() const { + return RuntimeBCPath; + } + + static bool classof(const TapirTargetOptions *TTO) { + return TTO->getKind() == TTO_OpenCilk; + } + +protected: + friend TapirTargetOptions; + + OpenCilkABIOptions *cloneImpl() const { + return new OpenCilkABIOptions(RuntimeBCPath); + } +}; + +} // end namespace llvm + +#endif diff --git a/llvm/include/llvm/Transforms/Tapir/TapirToTarget.h b/llvm/include/llvm/Transforms/Tapir/TapirToTarget.h new file mode 100644 index 00000000000000..8aa744f6e4ced9 --- /dev/null +++ b/llvm/include/llvm/Transforms/Tapir/TapirToTarget.h @@ -0,0 +1,34 @@ +//===- TapirToTarget.h - Lower Tapir to target ABI --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass lowers Tapir construct to a specified runtime ABI. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_TAPIR_TAPIRTOTARGET_H +#define LLVM_TRANSFORMS_TAPIR_TAPIRTOTARGET_H + +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" + +namespace llvm { + +/// The TapirToTarget Pass. +struct TapirToTargetPass : public PassInfoMixin { + TapirToTargetPass(TapirTargetID TargetID = TapirTargetID::Last_TapirTargetID) + : TargetID(TargetID) {} + + /// \brief Run the pass over the module. + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + TapirTargetID TargetID; +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_TAPIR_TAPIRTOTARGET_H diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 1c528a0100da92..04a383d7353306 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -155,6 +155,7 @@ struct CriticalEdgeSplittingOptions { /// provided. If it cannot be preserved, no splitting will take place. If it /// is not set, preserve loop-simplify form if possible. bool PreserveLoopSimplify = true; + bool SplitDetachContinue = false; CriticalEdgeSplittingOptions(DominatorTree *DT = nullptr, LoopInfo *LI = nullptr, @@ -186,6 +187,11 @@ struct CriticalEdgeSplittingOptions { PreserveLoopSimplify = false; return *this; } + + CriticalEdgeSplittingOptions &setSplitDetachContinue() { + SplitDetachContinue = true; + return *this; + } }; /// When a loop exit edge is split, LCSSA form may require new PHIs in the new diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h index 429970cbe3042c..ad46e273011021 100644 --- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -69,6 +69,13 @@ namespace llvm { bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, StringRef Name); + /// Analyze the name the given function and set any applicable attributes. If + /// the library function is unavailable, this doesn't modify it. + /// + /// Returns true if any attributes were set and false otherwise. + bool inferTapirTargetLibFuncAttributes(Function &F, + const TargetLibraryInfo &TLI); + /// Check whether the overloaded floating point function /// corresponding to \a Ty is available. bool hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 1c342b871a4a45..20075957d5a3db 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -41,6 +41,7 @@ class Loop; class LoopInfo; class Module; class ProfileSummaryInfo; +class ResumeInst; class ReturnInst; class DomTreeUpdater; @@ -71,6 +72,9 @@ struct ClonedCodeInfo { /// are in the entry block but are not a constant size. bool ContainsDynamicAllocas = false; + /// This is set to true if the cloned code contains a detach instruction. + bool ContainsDetach = false; + /// All cloned call sites that have operand bundles attached are appended to /// this vector. This vector may contain nulls or undefs if some of the /// originally inserted callsites were DCE'ed after they were cloned. @@ -177,6 +181,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, const Instruction *StartingInst, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, + SmallVectorImpl &Resumes, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr); @@ -194,6 +199,7 @@ void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, + SmallVectorImpl &Resumes, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = nullptr); diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h index d23db1574e9daa..faf9a0e37b8d39 100644 --- a/llvm/include/llvm/Transforms/Utils/Local.h +++ b/llvm/include/llvm/Transforms/Utils/Local.h @@ -379,6 +379,13 @@ Instruction *removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); bool removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU = nullptr, MemorySSAUpdater *MSSAU = nullptr); +/// Remove all detach-unwind blocks that do not catch exceptions from detached +/// tasks. +/// +/// Returns true if any basic block was removed. +bool removeDeadDetachUnwinds(Function &F, DomTreeUpdater *DTU = nullptr, + MemorySSAUpdater *MSSAU = nullptr); + /// Combine the metadata of two instructions so that K can replace J. Some /// metadata kinds can only be kept if K does not move, meaning it dominated /// J in the original IR. diff --git a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h index 61bf93b74a15a0..159269e6b33f46 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopRotationUtils.h @@ -23,6 +23,7 @@ class MemorySSAUpdater; class ScalarEvolution; struct SimplifyQuery; class TargetTransformInfo; +class TaskInfo; /// Convert a loop into a loop with bottom test. It may /// perform loop latch simplication as well if the flag RotationOnly @@ -32,8 +33,9 @@ class TargetTransformInfo; /// LoopRotation. If it is true, the profitability heuristic will be ignored. bool LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, - MemorySSAUpdater *MSSAU, const SimplifyQuery &SQ, - bool RotationOnly, unsigned Threshold, bool IsUtilMode, + MemorySSAUpdater *MSSAU, TaskInfo *TI, + const SimplifyQuery &SQ, bool RotationOnly, + unsigned Threshold, bool IsUtilMode, bool PrepareForLTO = false); } // namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index c6864124e0bcbe..81d947bc01de71 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -40,6 +40,7 @@ class ScalarEvolution; class SCEV; class SCEVExpander; class TargetLibraryInfo; +class TaskInfo; class LPPassManager; class Instruction; struct RuntimeCheckingPtrGroup; @@ -151,8 +152,8 @@ class SinkAndHoistLICMFlags { bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *CurLoop, MemorySSAUpdater &, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, - Loop *OutermostLoop = nullptr); + SinkAndHoistLICMFlags &, TaskInfo *, + OptimizationRemarkEmitter *, Loop *OutermostLoop = nullptr); /// Call sinkRegion on loops contained within the specified loop /// in order from innermost to outermost. @@ -160,7 +161,7 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, TargetLibraryInfo *, TargetTransformInfo *, Loop *, MemorySSAUpdater &, ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &, - OptimizationRemarkEmitter *); + TaskInfo *, OptimizationRemarkEmitter *); /// Walk the specified region of the CFG (defined by all blocks /// dominated by the specified block, and that are in the current loop) in depth @@ -175,8 +176,8 @@ bool sinkRegionForLoopNest(DomTreeNode *, AAResults *, LoopInfo *, bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, AssumptionCache *, TargetLibraryInfo *, Loop *, MemorySSAUpdater &, ScalarEvolution *, ICFLoopSafetyInfo *, - SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, - bool AllowSpeculation); + SinkAndHoistLICMFlags &, TaskInfo *, + OptimizationRemarkEmitter *, bool, bool AllowSpeculation); /// Return true if the induction variable \p IV in a Loop whose latch is /// \p LatchBlock would become dead if the exit test \p Cond were removed. @@ -190,12 +191,13 @@ bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond); /// - The loop needs to have a Preheader /// - A unique dedicated exit block must exist /// -/// This also updates the relevant analysis information in \p DT, \p SE, \p LI -/// and \p MSSA if pointers to those are provided. +/// This also updates the relevant analysis information in \p DT, \p SE, \p LI, +/// \p TI and \p MSSA if pointers to those are provided. /// It also updates the loop PM if an updater struct is provided. void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, - LoopInfo *LI, MemorySSA *MSSA = nullptr); + LoopInfo *LI, TaskInfo *TI = nullptr, + MemorySSA *MSSA = nullptr); /// Remove the backedge of the specified loop. Handles loop nests and general /// loop structures subject to the precondition that the loop has no parent @@ -218,8 +220,9 @@ bool promoteLoopAccessesToScalars( SmallVectorImpl &, SmallVectorImpl &, PredIteratorCache &, LoopInfo *, DominatorTree *, AssumptionCache *AC, const TargetLibraryInfo *, TargetTransformInfo *, Loop *, - MemorySSAUpdater &, ICFLoopSafetyInfo *, OptimizationRemarkEmitter *, - bool AllowSpeculation, bool HasReadsOutsideSet); + MemorySSAUpdater &, ICFLoopSafetyInfo *, TaskInfo *, + OptimizationRemarkEmitter *, bool AllowSpeculation, + bool HasReadsOutsideSet); /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. @@ -305,6 +308,8 @@ TransformationMode hasUnrollAndJamTransformation(const Loop *L); TransformationMode hasVectorizeTransformation(const Loop *L); TransformationMode hasDistributeTransformation(const Loop *L); TransformationMode hasLICMVersioningTransformation(const Loop *L); +TransformationMode hasLoopStripmineTransformation(const Loop *L); +TransformationMode hasLoopSpawningTransformation(const Loop *L); /// @} /// Set input string into loop metadata by keeping other values intact. @@ -353,7 +358,7 @@ void getLoopAnalysisUsage(AnalysisUsage &AU); /// If \p ORE is set use it to emit optimization remarks. bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, MemorySSAUpdater &MSSAU, - bool TargetExecutesOncePerLoop, + bool TargetExecutesOncePerLoop, TaskInfo *TI, SinkAndHoistLICMFlags &LICMFlags, OptimizationRemarkEmitter *ORE = nullptr); diff --git a/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h b/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h index f827ffd3e676ac..68c97c8a1a5292 100644 --- a/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h +++ b/llvm/include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -20,6 +20,7 @@ template class ArrayRef; class AllocaInst; class DominatorTree; class AssumptionCache; +class TaskInfo; /// Return true if this alloca is legal for promotion. /// @@ -37,7 +38,7 @@ bool isAllocaPromotable(const AllocaInst *AI); /// the same function. /// void PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, - AssumptionCache *AC = nullptr); + AssumptionCache *AC = nullptr, TaskInfo *TI = nullptr); } // End llvm namespace diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h index 36fbf536f6d015..3553914b4d765a 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h @@ -54,6 +54,9 @@ class SSAUpdater { /// the vector. SmallVectorImpl *InsertedPHIs; + /// This keeps track of which values are defined in detached blocks. + void *VID = nullptr; + public: /// If InsertedPHIs is specified, it will be filled /// in with all PHI Nodes created by rewriting. @@ -106,6 +109,8 @@ class SSAUpdater { /// merge the appropriate values, and this value isn't live out of the block. Value *GetValueInMiddleOfBlock(BasicBlock *BB); + bool GetValueIsDetachedInBlock(BasicBlock *BB); + /// Rewrite a use of the symbolic value. /// /// This handles PHI nodes, which use their value in the corresponding diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h index a3e5ac3ac19d45..09f54ca17c4e31 100644 --- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h @@ -65,6 +65,9 @@ class SSAUpdaterImpl { // Marker for existing PHIs that match. PhiT *PHITag = nullptr; + // Flag to indicate that the AvailableVal would be used after a Reattach. + bool DetachedUse = false; + BBInfo(BlkT *ThisBB, ValT V) : BB(ThisBB), AvailableVal(V), DefBB(V ? this : nullptr) {} }; @@ -75,6 +78,10 @@ class SSAUpdaterImpl { SmallVectorImpl *InsertedPHIs; + using ValIsDetachedTy = DenseMap; + + ValIsDetachedTy *ValIsDetached; + using BlockListTy = SmallVectorImpl; using BBMapTy = DenseMap; @@ -83,8 +90,9 @@ class SSAUpdaterImpl { public: explicit SSAUpdaterImpl(UpdaterT *U, AvailableValsTy *A, - SmallVectorImpl *Ins) : - Updater(U), AvailableVals(A), InsertedPHIs(Ins) {} + SmallVectorImpl *Ins, + ValIsDetachedTy *D = nullptr) : + Updater(U), AvailableVals(A), InsertedPHIs(Ins), ValIsDetached(D) {} /// GetValue - Check to see if AvailableVals has an entry for the specified /// BB and if so, return it. If not, construct SSA form by first @@ -375,6 +383,10 @@ class SSAUpdaterImpl { (*AvailableVals)[Info->BB] = PHI; } + // Set of blocks with detached values that would be used except + // for Reattach. + SmallVector DetachedValBlocks; + // Now go back through the worklist in reverse order to fill in the // arguments for any new PHIs added in the forward traversal. for (typename BlockListTy::reverse_iterator I = BlockList->rbegin(), @@ -393,14 +405,47 @@ class SSAUpdaterImpl { if (!PHI) continue; + // Resolve detach and reattach predecessor information. A reattach + // predecessor should use the same available value as its corresponding + // detach. If a reattach predecessor does not have the same available + // value as its corresponding detach predecessor, note the use of a + // detached value. + SmallVector DetachPredInfo, ReattachPredInfo; // Iterate through the block's predecessors. for (unsigned p = 0; p != Info->NumPreds; ++p) { BBInfo *PredInfo = Info->Preds[p]; BlkT *Pred = PredInfo->BB; + if (Traits::BlockReattaches(Pred, Updater)) { + ReattachPredInfo.push_back(PredInfo); + continue; + } // Skip to the nearest preceding definition. if (PredInfo->DefBB != PredInfo) PredInfo = PredInfo->DefBB; Traits::AddPHIOperand(PHI, PredInfo->AvailableVal, Pred); + if (Traits::BlockDetaches(Pred, Updater)) + DetachPredInfo.push_back(PredInfo); + } + if (!ReattachPredInfo.empty()) { + assert(!DetachPredInfo.empty() && "Reattach predecessor found with no " + "corresponding Detach predecessor."); + for (BBInfo *RPInfo : ReattachPredInfo) { + bool FoundMatchingDetach = false; + for (BBInfo *DPInfo : DetachPredInfo) { + if (RPInfo->DefBB->BB == DPInfo->DefBB->BB) { + // Available value from predecessor through a reattach is the + // same as that for the corresponding detach. + Traits::AddPHIOperand(PHI, DPInfo->AvailableVal, RPInfo->BB); + FoundMatchingDetach = true; + break; + } + } + if (!FoundMatchingDetach) { + DetachedValBlocks.push_back(Info); + Traits::AddPHIOperand(PHI, Traits::GetUndefVal(RPInfo->BB, Updater), + RPInfo->BB); + } + } } LLVM_DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n"); @@ -408,6 +453,9 @@ class SSAUpdaterImpl { // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(PHI); } + + // Mark any definitions that are detached from their use. + MarkDetachedDefs(&DetachedValBlocks); } /// FindExistingPHI - Look through the PHI nodes in a block to see if any of @@ -441,7 +489,21 @@ class SSAUpdaterImpl { for (typename Traits::PHI_iterator I = Traits::PHI_begin(PHI), E = Traits::PHI_end(PHI); I != E; ++I) { ValT IncomingVal = I.getIncomingValue(); - BBInfo *PredInfo = BBMap[I.getIncomingBlock()]; + BlkT *BB = I.getIncomingBlock(); + + // Replace a reattach predecessor with the corresponding + // detach predecessor. + // + // TODO: Remove the implicit assumption here that each basic + // block has at most one reattach predecessor. + if (Traits::BlockReattaches(BB, Updater)) + for (typename Traits::PHI_iterator PI = Traits::PHI_begin(PHI), + PE = Traits::PHI_end(PHI); PI != PE; ++PI) + if (Traits::BlockDetaches(PI.getIncomingBlock(), Updater)) { + BB = PI.getIncomingBlock(); + break; + } + BBInfo *PredInfo = BBMap[BB]; // Skip to the nearest preceding definition. if (PredInfo->DefBB != PredInfo) PredInfo = PredInfo->DefBB; @@ -484,6 +546,30 @@ class SSAUpdaterImpl { BBMap[BB]->AvailableVal = PHIVal; } } + + /// MarkDetachedDefs - Mark all definitions that reach the basic + /// blocks in WorkList as having detached uses. + void MarkDetachedDefs(SmallVector *WorkList) { + BBInfo *Info; + while (!WorkList->empty()) { + Info = WorkList->pop_back_val(); + Info->DetachedUse = true; + + ValT AvailableVal = Info->AvailableVal; + if (!AvailableVal) + continue; + + if (ValIsDetached) + (*ValIsDetached)[Info->BB] = true; + + if (Traits::ValueIsPHI(AvailableVal, Updater) || + Info->DefBB != Info) + for (unsigned p = 0; p != Info->NumPreds; ++p) + if (!Info->Preds[p]->DetachedUse) + WorkList->push_back(Info->Preds[p]); + } + } + }; } // end namespace llvm diff --git a/llvm/include/llvm/Transforms/Utils/TapirUtils.h b/llvm/include/llvm/Transforms/Utils/TapirUtils.h new file mode 100644 index 00000000000000..d34e20acc0bc03 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/TapirUtils.h @@ -0,0 +1,380 @@ +//===- TapirUtils.h - Utility methods for Tapir ----------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file utility methods for handling code containing Tapir instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_TAPIRUTILS_H +#define LLVM_TRANSFORMS_UTILS_TAPIRUTILS_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +namespace llvm { + +class BasicBlock; +class DominatorTree; +class DomTreeUpdater; +class Loop; +class LoopInfo; +class MemorySSAUpdater; +class Spindle; +class Task; +class TaskInfo; + +// Check if the given instruction is an intrinsic with the specified ID. If a +// value \p V is specified, then additionally checks that the first argument of +// the intrinsic matches \p V. +bool isTapirIntrinsic(Intrinsic::ID ID, const Instruction *I, + const Value *V = nullptr); + +/// Returns true if the given instruction performs a detached.rethrow, false +/// otherwise. If \p SyncRegion is specified, then additionally checks that the +/// detached.rethrow uses \p SyncRegion. +bool isDetachedRethrow(const Instruction *I, const Value *SyncRegion = nullptr); + +/// Returns true if the given instruction performs a taskframe.resume, false +/// otherwise. If \p TaskFrame is specified, then additionally checks that the +/// taskframe.resume uses \p TaskFrame. +bool isTaskFrameResume(const Instruction *I, const Value *TaskFrame = nullptr); + +/// Returns true if the given basic block \p B is a placeholder successor of a +/// taskframe.resume or detached.rethrow. +bool isTapirPlaceholderSuccessor(const BasicBlock *B); + +/// Returns a taskframe.resume that uses the given taskframe, or nullptr if no +/// taskframe.resume uses this taskframe. +InvokeInst *getTaskFrameResume(Value *TaskFrame); + +/// Returns the unwind destination of a taskframe.resume that uses the given +/// taskframe, or nullptr if no such unwind destination exists. +BasicBlock *getTaskFrameResumeDest(Value *TaskFrame); + +/// Returns true if the given instruction is a sync.uwnind, false otherwise. If +/// \p SyncRegion is specified, then additionally checks that the sync.unwind +/// uses \p SyncRegion. +bool isSyncUnwind(const Instruction *I, const Value *SyncRegion = nullptr, + bool CheckForInvoke = false); + +/// Returns true if BasicBlock \p B is a placeholder successor, that is, it's +/// the immediate successor of only detached-rethrow and taskframe-resume +/// instructions. +bool isPlaceholderSuccessor(const BasicBlock *B); + +/// Returns true if the given basic block ends a taskframe, false otherwise. In +/// particular, this method checks if the penultimate instruction in the basic +/// block is a taskframe.end intrinsic call. If \p TaskFrame is specified, then +/// additionally checks that the taskframe.end uses \p TaskFrame. +bool endsTaskFrame(const BasicBlock *B, const Value *TaskFrame = nullptr); + +/// Returns the spindle containing the taskframe.create used by task \p T, or +/// the entry spindle of \p T if \p T has no such taskframe.create spindle. +Spindle *getTaskFrameForTask(Task *T); + +// Removes the given sync.unwind instruction, if it is dead. Returns true if +// the sync.unwind was removed, false otherwise. +bool removeDeadSyncUnwind(CallBase *SyncUnwind, DomTreeUpdater *DTU = nullptr); + +/// Returns true if the reattach instruction appears to match the given detach +/// instruction, false otherwise. +bool ReattachMatchesDetach(const ReattachInst *RI, const DetachInst *DI, + DominatorTree *DT = nullptr); + +/// Returns true of the given task itself contains a sync instruction. +bool taskContainsSync(const Task *T); + +/// Move static allocas in Block into Entry, which is assumed to dominate Block. +/// Leave lifetime markers behind in Block and before each instruction in +/// ExitPoints for those static allocas. Returns true if Block still contains +/// dynamic allocas, which cannot be moved. +bool MoveStaticAllocasInBlock(BasicBlock *Entry, BasicBlock *Block, + SmallVectorImpl &ExitPoints); + +/// Inline any taskframe.resume markers associated with the given taskframe. If +/// \p DT is provided, then it will be updated to reflect the CFG changes. +void InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT = nullptr); + +/// Clone exception-handling blocks EHBlocksToClone, with predecessors +/// EHBlockPreds in a given task. Updates EHBlockPreds to point at the cloned +/// blocks. If the given pointers are non-null, updates blocks in *InlinedLPads +/// and *DetachedRethrows to refer to cloned blocks, and updates DT and LI to +/// reflect CFG updates. +void cloneEHBlocks(Function *F, SmallVectorImpl &EHBlocksToClone, + SmallPtrSetImpl &EHBlockPreds, + const char *Suffix, + SmallPtrSetImpl *InlinedLPads, + SmallVectorImpl *DetachedRethrows, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr); + +/// Serialize the detach DI. \p ParentEntry should be the entry block of the +/// task that contains DI. \p Reattaches, \p InlinedLPads, and \p +/// DetachedRethrows identify the reattaches, landing pads, and detached +/// rethrows in the task DI spawns that need special handling during +/// serialization. If \p DT is provided, then it will be updated to reflect the +/// CFG changes. +void SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry, + BasicBlock *EHContinue, Value *LPadValInEHContinue, + SmallVectorImpl &Reattaches, + SmallVectorImpl *EHBlocksToClone, + SmallPtrSetImpl *EHBlockPreds, + SmallPtrSetImpl *InlinedLPads, + SmallVectorImpl *DetachedRethrows, + bool ReplaceWithTaskFrame = false, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr); + +/// Analyze a task T for serialization. Gets the reattaches, landing pads, and +/// detached rethrows that need special handling during serialization. +void AnalyzeTaskForSerialization( + Task *T, SmallVectorImpl &Reattaches, + SmallVectorImpl &EHBlocksToClone, + SmallPtrSetImpl &EHBlockPreds, + SmallPtrSetImpl &InlinedLPads, + SmallVectorImpl &DetachedRethrows); + +/// Serialize the detach DI that spawns task T. If \p DT is provided, then it +/// will be updated to reflect the CFG changes. +void SerializeDetach(DetachInst *DI, Task *T, bool ReplaceWithTaskFrame = false, + DominatorTree *DT = nullptr); + +/// Get the entry basic block to the detached context that contains +/// the specified block. +const BasicBlock *GetDetachedCtx(const BasicBlock *BB); +BasicBlock *GetDetachedCtx(BasicBlock *BB); + +// Returns true if the function may not be synced at the point of the given +// basic block, false otherwise. This function does a simple depth-first +// traversal of the CFG, and as such, produces a conservative result. +bool mayBeUnsynced(const BasicBlock *BB); + +/// isDetachContinueEdge - Return true if the edge from terminator instruction +/// TI to successor basic block Succ is a detach-continue edge. +bool isDetachContinueEdge(const Instruction *TI, const BasicBlock *Succ); + +/// isCriticalContinueEdge - Return true if the specified edge is a critical +/// detach-continue edge. Critical detach-continue edges are critical edges - +/// from a block with multiple successors to a block with multiple predecessors +/// - even after ignoring all reattach edges. +bool isCriticalContinueEdge(const Instruction *TI, unsigned SuccNum); + +/// GetDetachedCFG - Get the set of basic blocks in the CFG of the parallel task +/// spawned by detach instruction DI. The CFG will include the +/// exception-handling blocks that are separately identified in EHBlocks, which +/// might not be unique to the task. TaskReturns will store the set of basic +/// blocks that terminate the CFG of the parallel task. +void GetDetachedCFG(const DetachInst &DI, const DominatorTree &DT, + SmallPtrSetImpl &TaskBlocks, + SmallPtrSetImpl &EHBlocks, + SmallPtrSetImpl &TaskReturns); + +/// canDetach - Return true if the given function can perform a detach, false +/// otherwise. +bool canDetach(const Function *F); + +/// getDetachUnwindPHIUses - Collect all PHI nodes that directly or indirectly +/// use the landing pad for the unwind destination of detach DI. +void getDetachUnwindPHIUses(DetachInst *DI, + SmallPtrSetImpl &UnwindPHIs); + +/// getTaskFrameUsed - Return the taskframe used in the given detached block. +Value *getTaskFrameUsed(BasicBlock *Detached); + +/// splitTaskFrameCreateBlocks - Split basic blocks in function F at +/// taskframe.create intrinsics. Returns true if anything changed, false +/// otherwise. +bool splitTaskFrameCreateBlocks(Function &F, DominatorTree *DT = nullptr, + TaskInfo *TI = nullptr, LoopInfo *LI = nullptr, + MemorySSAUpdater *MSSAU = nullptr); + +/// taskFrameContains - Returns true if the given basic block \p B is contained +/// within the taskframe \p TF. +bool taskFrameContains(const Spindle *TF, const BasicBlock *B, + const TaskInfo &TI); + +/// taskFrameEncloses - Returns true if the given basic block \p B is enclosed +/// within the taskframe \p TF. +bool taskFrameEncloses(const Spindle *TF, const BasicBlock *B, + const TaskInfo &TI); + +/// fixupTaskFrameExternalUses - Fix any uses of variables defined in +/// taskframes, but outside of tasks themselves. For each such variable, insert +/// a memory allocation in the parent frame, add a store to that memory in the +/// taskframe, and modify external uses to use the value in that memory loaded +/// at the tasks continuation. +void fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, + const DominatorTree &DT); + +/// FindTaskFrameCreateInBlock - Return the taskframe.create intrinsic in \p BB, +/// or nullptr if no taskframe.create intrinsic exists in \p BB. If specified, +/// ignores TFToIgnore when scanning for a taskframe.create. +Instruction *FindTaskFrameCreateInBlock(BasicBlock *BB, + const Value *TFToIgnore = nullptr); + +/// CreateSubTaskUnwindEdge - Create a landingpad for the exit of a taskframe or +/// task. +BasicBlock *CreateSubTaskUnwindEdge(Intrinsic::ID TermFunc, Value *Token, + BasicBlock *UnwindEdge, + BasicBlock *Unreachable, + Instruction *ParentI); + +/// promoteCallsInTasksToInvokes - Traverse the control-flow graph of F to +/// convert calls to invokes, recursively traversing tasks and taskframes to +/// insert appropriate detached.rethrow and taskframe.resume terminators. +void promoteCallsInTasksToInvokes(Function &F, const Twine Name = "cleanup"); + +/// eraseTaskFrame - Remove the specified taskframe and all uses of it. The +/// given \p TaskFrame should correspond to a taskframe.create call. The +/// DominatorTree \p DT is updated to reflect changes to the CFG, if \p DT is +/// not null. +void eraseTaskFrame(Value *TaskFrame, DominatorTree *DT = nullptr); + +/// Utility class for getting and setting Tapir-related loop hints in the form +/// of loop metadata. +/// +/// This class keeps a number of loop annotations locally (as member variables) +/// and can, upon request, write them back as metadata on the loop. It will +/// initially scan the loop for existing metadata, and will update the local +/// values based on information in the loop. +class TapirLoopHints { +public: + enum SpawningStrategy { + ST_SEQ, + ST_DAC, + ST_END, + }; + +private: + enum HintKind { HK_STRATEGY, HK_GRAINSIZE }; + + /// Hint - associates name and validation with the hint value. + struct Hint { + const char *Name; + unsigned Value; // This may have to change for non-numeric values. + HintKind Kind; + + Hint(const char *Name, unsigned Value, HintKind Kind) + : Name(Name), Value(Value), Kind(Kind) {} + + bool validate(unsigned Val) const { + switch (Kind) { + case HK_STRATEGY: + return (Val < ST_END); + case HK_GRAINSIZE: + return true; + } + return false; + } + }; + + /// Spawning strategy + Hint Strategy; + /// Grainsize + Hint Grainsize; + + /// Return the loop metadata prefix. + static StringRef Prefix() { return "tapir.loop."; } + +public: + static std::string printStrategy(enum SpawningStrategy Strat) { + switch(Strat) { + case TapirLoopHints::ST_SEQ: + return "Spawn iterations sequentially"; + case TapirLoopHints::ST_DAC: + return "Use divide-and-conquer"; + case TapirLoopHints::ST_END: + return "Unknown"; + } + } + + TapirLoopHints(const Loop *L) + : Strategy("spawn.strategy", ST_SEQ, HK_STRATEGY), + Grainsize("grainsize", 0, HK_GRAINSIZE), + TheLoop(L) { + // Populate values with existing loop metadata. + getHintsFromMetadata(); + } + + // /// Dumps all the hint information. + // std::string emitRemark() const { + // TapirLoopReport R; + // R << "Strategy = " << printStrategy(getStrategy()); + + // return R.str(); + // } + + enum SpawningStrategy getStrategy() const { + return (SpawningStrategy)Strategy.Value; + } + + unsigned getGrainsize() const { + return Grainsize.Value; + } + + /// Clear Tapir Hints metadata. + void clearHintsMetadata(); + + /// Mark the loop L as having no spawning strategy. + void clearStrategy() { + Strategy.Value = ST_SEQ; + Hint Hints[] = {Strategy}; + writeHintsToMetadata(Hints); + } + + void clearClonedLoopMetadata(ValueToValueMapTy &VMap) { + Hint ClearStrategy = Strategy; + ClearStrategy.Value = ST_SEQ; + Hint Hints[] = {ClearStrategy}; + writeHintsToClonedMetadata(Hints, VMap); + } + + void setAlreadyStripMined() { + Grainsize.Value = 1; + Hint Hints[] = {Grainsize}; + writeHintsToMetadata(Hints); + } + +private: + /// Find hints specified in the loop metadata and update local values. + void getHintsFromMetadata(); + + /// Checks string hint with one operand and set value if valid. + void setHint(StringRef Name, Metadata *Arg); + + /// Create a new hint from name / value pair. + MDNode *createHintMetadata(StringRef Name, unsigned V) const; + + /// Matches metadata with hint name. + bool matchesHintMetadataName(MDNode *Node, ArrayRef HintTypes) const; + + /// Sets current hints into loop metadata, keeping other values intact. + void writeHintsToMetadata(ArrayRef HintTypes); + + /// Sets hints into cloned loop metadata, keeping other values intact. + void writeHintsToClonedMetadata(ArrayRef HintTypes, + ValueToValueMapTy &VMap); + + /// The loop these hints belong to. + const Loop *TheLoop; +}; + +/// Returns true if Tapir-loop hints require loop outlining during lowering. +bool hintsDemandOutlining(const TapirLoopHints &Hints); + +/// Create a new Loop MDNode by copying non-Tapir metadata from OrigLoopID. +MDNode *CopyNonTapirLoopMetadata(MDNode *LoopID, MDNode *OrigLoopID); + +/// Examine a given loop to determine if it is a Tapir loop that can and should +/// be processed. Returns the Task that encodes the loop body if so, or nullptr +/// if not. +Task *getTaskIfTapirLoop(const Loop *L, TaskInfo *TI); + +} // End llvm namespace + +#endif diff --git a/llvm/include/llvm/Transforms/Utils/TaskCanonicalize.h b/llvm/include/llvm/Transforms/Utils/TaskCanonicalize.h new file mode 100644 index 00000000000000..a2c219b471a0c1 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/TaskCanonicalize.h @@ -0,0 +1,28 @@ +//===- TaskCanonicalize.h - Tapir task canonicalization pass -*- C++ -*----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass canonicalizes Tapir tasks. In particular, this pass splits blocks +// at taskframe.create intrinsics. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_TASKCANONICALIZE_H +#define LLVM_TRANSFORMS_UTILS_TASKCANONICALIZE_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +/// This pass is responsible for Tapir task simplification. +class TaskCanonicalizePass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_TASKCANONICALIZE_H diff --git a/llvm/include/llvm/Transforms/Utils/TaskSimplify.h b/llvm/include/llvm/Transforms/Utils/TaskSimplify.h new file mode 100644 index 00000000000000..681af4e07ea4d8 --- /dev/null +++ b/llvm/include/llvm/Transforms/Utils/TaskSimplify.h @@ -0,0 +1,41 @@ +//===- TaskSimplify.h - Tapir task simplification pass -*- C++ -*----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to simplify Tapir tasks. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_TASKSIMPLIFY_H +#define LLVM_TRANSFORMS_UTILS_TASKSIMPLIFY_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class DominatorTree; +struct MaybeParallelTasks; +class Task; +class TaskInfo; + +/// This pass is responsible for Tapir task simplification. +class TaskSimplifyPass : public PassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Simplify syncs in the specified task T. +bool simplifySyncs(Task *T, MaybeParallelTasks &MPTasks); + +/// Simplify the specified task T. +bool simplifyTask(Task *T); + +/// Simplify the taskframes analyzed by TapirTaskInfo TI. +bool simplifyTaskFrames(TaskInfo &TI, DominatorTree &DT); + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_TASKSIMPLIFY_H diff --git a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h index 4f3010965b591d..4eb0653c04ca5b 100644 --- a/llvm/include/llvm/Transforms/Utils/UnrollLoop.h +++ b/llvm/include/llvm/Transforms/Utils/UnrollLoop.h @@ -33,6 +33,7 @@ class ProfileSummaryInfo; class OptimizationRemarkEmitter; class ScalarEvolution; class StringRef; +class TaskInfo; class Value; using NewLoopsMap = SmallDenseMap; @@ -76,7 +77,7 @@ struct UnrollLoopOptions { LoopUnrollResult UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, + AssumptionCache *AC, TaskInfo *TI, const llvm::TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop = nullptr); diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap index 4c2ba437edb9f9..eb44b99c9c9367 100644 --- a/llvm/include/module.modulemap +++ b/llvm/include/module.modulemap @@ -12,6 +12,7 @@ module LLVM_Analysis { textual header "llvm/Analysis/ScalarFuncs.def" textual header "llvm/Analysis/TargetLibraryInfo.def" textual header "llvm/Analysis/VecFuncs.def" + textual header "llvm/Analysis/TapirTargetFuncs.def" } module LLVM_AsmParser { diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 7b2f91f5392a5b..a800f1670dacaa 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -23,10 +23,13 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DataRaceFreeAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCAliasAnalysis.h" @@ -38,6 +41,7 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Type.h" @@ -108,6 +112,14 @@ AliasResult AAResults::alias(const MemoryLocation &LocA, return alias(LocA, LocB, AAQIP, nullptr); } +AliasResult AAResults::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB, + bool AssumeSameSpindle) { + SimpleAAQueryInfo AAQIP(*this); + AAQIP.AssumeSameSpindle = AssumeSameSpindle; + return alias(LocA, LocB, AAQIP, nullptr); +} + AliasResult AAResults::alias(const MemoryLocation &LocA, const MemoryLocation &LocB, AAQueryInfo &AAQI, const Instruction *CtxI) { @@ -187,6 +199,37 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, return getModRefInfo(I, Call2, AAQIP); } +ModRefInfo AAResults::getModRefInfo(const Instruction *I, + const CallBase *Call2, + bool AssumeSameSpindle) { + SimpleAAQueryInfo AAQIP(*this); + AAQIP.AssumeSameSpindle = AssumeSameSpindle; + return getModRefInfo(I, Call2, AAQIP); +} + +/// Returns true if the given instruction performs a detached rethrow, false +/// otherwise. +static bool isDetachedRethrow(const Instruction *I, + const Value *SyncRegion = nullptr) { + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID()) + if (!SyncRegion || (SyncRegion == II->getArgOperand(0))) + return true; + return false; +} + +static bool taskTerminator(const Instruction *T, const Value *SyncRegion) { + if (const ReattachInst *RI = dyn_cast(T)) + if (SyncRegion == RI->getSyncRegion()) + return true; + + if (isDetachedRethrow(T, SyncRegion)) + return true; + + return false; +} + ModRefInfo AAResults::getModRefInfo(const Instruction *I, const CallBase *Call2, AAQueryInfo &AAQI) { // We may have two calls. @@ -197,6 +240,47 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, const CallBase *Call2, // If this is a fence, just return ModRef. if (I->isFenceLike()) return ModRefInfo::ModRef; + // If this is a detach, collect the ModRef info of the detached operations. + if (auto D = dyn_cast(I)) { + ModRefInfo Result = ModRefInfo::NoModRef; + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(D->getDetached()); + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + for (Instruction &DI : BB->instructionsWithoutDebug()) { + // Fail fast if we encounter an invalid CFG. + assert(!(D == &DI) && + "Detached CFG reaches its own Detach instruction."); + + if (&DI == Call2) + return ModRefInfo::NoModRef; + + // No need to recursively check nested syncs or detaches, as nested + // tasks are wholly contained in the detached sub-CFG we're iterating + // through. + if (isa(DI) || isa(DI)) + continue; + + if (isa(DI) || isa(DI) || isa(DI) || + isa(DI) || isa(DI) || + isa(DI) || isa(DI) || + DI.isFenceLike() || isa(DI)) + Result |= getModRefInfo(&DI, Call2, AAQI); + } + + // Add successors + const Instruction *T = BB->getTerminator(); + if (taskTerminator(T, D->getSyncRegion())) + continue; + for (unsigned idx = 0, max = T->getNumSuccessors(); idx < max; ++idx) + WorkList.push_back(T->getSuccessor(idx)); + } + return Result; + } // Otherwise, check if the call modifies or references the // location this memory access defines. The best we can say // is that if the call references what this instruction @@ -208,6 +292,10 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, const CallBase *Call2, return ModRefInfo::NoModRef; } +static bool effectivelyArgMemOnly(const CallBase *Call, AAQueryInfo &AAQI) { + return Call->isStrandPure() && AAQI.AssumeSameSpindle; +} + ModRefInfo AAResults::getModRefInfo(const CallBase *Call, const MemoryLocation &Loc, AAQueryInfo &AAQI) { @@ -381,6 +469,10 @@ MemoryEffects AAResults::getMemoryEffects(const CallBase *Call, return Result; } + if (effectivelyArgMemOnly(Call, AAQI)) + return MemoryEffects(MemoryEffects::Location::ArgMem, + Result.getModRef(MemoryEffects::Location::ArgMem)); + return Result; } @@ -403,6 +495,79 @@ MemoryEffects AAResults::getMemoryEffects(const Function *F) { return Result; } +MemoryEffects AAResults::getMemoryEffects(const DetachInst *D, + AAQueryInfo &AAQI) { + MemoryEffects Result = MemoryEffects::none(); + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(D->getDetached()); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + for (const Instruction &I : *BB) { + // Fail fast if we encounter an invalid CFG. + assert(!(D == &I) && + "Invalid CFG found: Detached CFG reaches its own Detach."); + + if (const auto *CS = dyn_cast(&I)) + Result |= getMemoryEffects(CS, AAQI); + + // Early-exit the moment we reach the top of the lattice. + if (Result == MemoryEffects::unknown()) + return Result; + } + + // Add successors + const Instruction *T = BB->getTerminator(); + if (taskTerminator(T, D->getSyncRegion())) + continue; + for (unsigned idx = 0, max = T->getNumSuccessors(); idx < max; ++idx) + WorkList.push_back(T->getSuccessor(idx)); + } + + return Result; +} + +MemoryEffects AAResults::getMemoryEffects(const SyncInst *S, + AAQueryInfo &AAQI) { + MemoryEffects Result = MemoryEffects::none(); + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(S->getParent()); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + if (const DetachInst *D = dyn_cast(BB->getTerminator())) + Result |= getMemoryEffects(D, AAQI); + + // Early-exit the moment we reach the top of the lattice. + if (Result == MemoryEffects::unknown()) + return Result; + + // Add predecessors + for (const BasicBlock *Pred : predecessors(BB)) { + const Instruction *PT = Pred->getTerminator(); + // Ignore reattached predecessors and predecessors that end in syncs, + // because this sync does not wait on those predecessors. + if (isa(PT) || isa(PT) || isDetachedRethrow(PT)) + continue; + + // If this block is detached, ignore the predecessor that detaches it. + if (const DetachInst *Det = dyn_cast(PT)) + if (Det->getDetached() == BB) + continue; + + WorkList.push_back(Pred); + } + } + + return Result; +} + raw_ostream &llvm::operator<<(raw_ostream &OS, AliasResult AR) { switch (AR) { case AliasResult::NoAlias: @@ -605,6 +770,8 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, if (OptLoc == std::nullopt) { if (const auto *Call = dyn_cast(I)) return getMemoryEffects(Call, AAQIP).getModRef(); + if (const auto *D = dyn_cast(I)) + return getMemoryEffects(D, AAQIP).getModRef(); } const MemoryLocation &Loc = OptLoc.value_or(MemoryLocation()); @@ -630,6 +797,10 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP); case Instruction::CatchRet: return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP); + case Instruction::Detach: + return getModRefInfo((const DetachInst *)I, Loc, AAQIP); + case Instruction::Sync: + return getModRefInfo((const SyncInst *)I, Loc, AAQIP); default: assert(!I->mayReadOrWriteMemory() && "Unhandled memory access instruction!"); @@ -637,6 +808,89 @@ ModRefInfo AAResults::getModRefInfo(const Instruction *I, } } +ModRefInfo AAResults::getModRefInfo(const DetachInst *D, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + ModRefInfo Result = ModRefInfo::NoModRef; + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(D->getDetached()); + while (!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + for (const Instruction &I : BB->instructionsWithoutDebug()) { + // Fail fast if we encounter an invalid CFG. + assert(!(D == &I) && + "Invalid CFG found: Detached CFG reaches its own Detach."); + + // No need to recursively check nested syncs or detaches, as nested tasks + // are wholly contained in the detached sub-CFG we're iterating through. + if (isa(I) || isa(I)) + continue; + + Result |= getModRefInfo(&I, Loc, AAQI); + + // Early-exit the moment we reach the top of the lattice. + if (isModAndRefSet(Result)) + return Result; + } + + // Add successors + const Instruction *T = BB->getTerminator(); + if (taskTerminator(T, D->getSyncRegion())) + continue; + for (const BasicBlock *Successor : successors(BB)) + WorkList.push_back(Successor); + } + + return Result; +} + +ModRefInfo AAResults::getModRefInfo(const SyncInst *S, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + // If no memory location pointer is given, treat the sync like a fence. + if (!Loc.Ptr) + return ModRefInfo::ModRef; + + ModRefInfo Result = ModRefInfo::NoModRef; + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(S->getParent()); + while(!WorkList.empty()) { + const BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + if (const DetachInst *D = dyn_cast(BB->getTerminator())) { + Result |= getModRefInfo(D, Loc, AAQI); + + // Early-exit the moment we reach the top of the lattice. + if (isModAndRefSet(Result)) + return Result; + } + + // Add predecessors + for (const BasicBlock *Pred : predecessors(BB)) { + const Instruction *PT = Pred->getTerminator(); + // Ignore reattached predecessors and predecessors that end in syncs, + // because this sync does not wait on those predecessors. + if (isa(PT) || isa(PT) || isDetachedRethrow(PT)) + continue; + // If this block is detached, ignore the predecessor that detaches it. + if (const DetachInst *Det = dyn_cast(PT)) + if (Det->getDetached() == BB) + continue; + + WorkList.push_back(Pred); + } + } + + return Result; +} + /// Return information about whether a particular call site modifies /// or reads the specified memory location \p MemLoc before instruction \p I /// in a BasicBlock. @@ -760,6 +1014,7 @@ char AAResultsWrapperPass::ID = 0; INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa", "Function Alias Analysis Results", false, true) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DRFAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) @@ -802,6 +1057,8 @@ bool AAResultsWrapperPass::runOnFunction(Function &F) { AAR->addAAResult(WrapperPass->getResult()); if (auto *WrapperPass = getAnalysisIfAvailable()) AAR->addAAResult(WrapperPass->getResult()); + if (auto *WrapperPass = getAnalysisIfAvailable()) + AAR->addAAResult(WrapperPass->getResult()); // If available, run an external AA providing callback over the results as // well. @@ -826,6 +1083,7 @@ void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); + AU.addUsedIfAvailable(); AU.addUsedIfAvailable(); } @@ -842,6 +1100,12 @@ bool llvm::isNoAliasCall(const Value *V) { return false; } +bool llvm::isNoAliasCallIfInSameSpindle(const Value *V) { + if (const auto *Call = dyn_cast(V)) + return Call->hasRetAttr(Attribute::StrandNoAlias); + return isNoAliasCall(V); +} + static bool isNoAliasOrByValArgument(const Value *V) { if (const Argument *A = dyn_cast(V)) return A->hasNoAliasAttr() || A->hasByValAttr(); @@ -860,6 +1124,14 @@ bool llvm::isIdentifiedObject(const Value *V) { return false; } +bool llvm::isIdentifiedObjectIfInSameSpindle(const Value *V) { + if (isIdentifiedObject(V)) + return true; + if (isNoAliasCallIfInSameSpindle(V)) + return true; + return false; +} + bool llvm::isIdentifiedFunctionLocal(const Value *V) { return isa(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V); } diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp index 91b889116dfa2d..ba04f8bf56129f 100644 --- a/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/llvm/lib/Analysis/AliasSetTracker.cpp @@ -401,6 +401,14 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { if (isa(Inst)) return; // Ignore DbgInfo Intrinsics. + // Check for invokes of detached.rethrow, taskframe.resume, or sync.unwind. + if (const InvokeInst *I = dyn_cast(Inst)) + if (const Function *Called = I->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID() || + Intrinsic::sync_unwind == Called->getIntrinsicID()) + return; + if (auto *II = dyn_cast(Inst)) { // These intrinsics will show up as affecting memory, but they are just // markers. @@ -412,6 +420,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::sideeffect: case Intrinsic::pseudoprobe: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: return; } } @@ -441,7 +455,7 @@ void AliasSetTracker::add(Instruction *I) { // Handle all calls with known mod/ref sets genericall if (auto *Call = dyn_cast(I)) - if (Call->onlyAccessesArgMemory()) { + if (Call->onlyAccessesArgMemory() || Call->isStrandPure()) { auto getAccessFromModRef = [](ModRefInfo MRI) { if (isRefSet(MRI) && isModSet(MRI)) return AliasSet::ModRefAccess; diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp index 5461ce07af0b9f..3ee9552771be9c 100644 --- a/llvm/lib/Analysis/Analysis.cpp +++ b/llvm/lib/Analysis/Analysis.cpp @@ -45,6 +45,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializePostDomOnlyPrinterWrapperPassPass(Registry); initializeAAResultsWrapperPassPass(Registry); initializeGlobalsAAWrapperPassPass(Registry); + initializeDRFAAWrapperPassPass(Registry); initializeIVUsersWrapperPassPass(Registry); initializeInstCountLegacyPassPass(Registry); initializeIntervalPartitionPass(Registry); @@ -68,7 +69,9 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeScalarEvolutionWrapperPassPass(Registry); initializeStackSafetyGlobalInfoWrapperPassPass(Registry); initializeStackSafetyInfoWrapperPassPass(Registry); + initializeTapirRaceDetectWrapperPassPass(Registry); initializeTargetTransformInfoWrapperPassPass(Registry); + initializeTaskInfoWrapperPassPass(Registry); initializeTypeBasedAAWrapperPassPass(Registry); initializeScopedNoAliasAAWrapperPassPass(Registry); initializeLCSSAVerificationPassPass(Registry); diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 16e0e1f66524fa..41270c2ad1160d 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1430,6 +1430,269 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, return Alias; } +// Given that O1 != O2, return NoAlias if they can not alias. +static AliasResult UnderlyingNoAlias(const Value *O1, const Value *O2, + AAQueryInfo &AAQI) { + assert(O1 != O2 && "identical arguments to UnderlyingNoAlias"); + + // If V1/V2 point to two different objects, we know that we have no alias. + if (AAQI.AssumeSameSpindle) { + if (isIdentifiedObjectIfInSameSpindle(O1) && + isIdentifiedObjectIfInSameSpindle(O2)) + return AliasResult::NoAlias; + } else { + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return AliasResult::NoAlias; + } + + // Constant pointers can't alias with non-const isIdentifiedObject objects. + if ((isa(O1) && isIdentifiedObject(O2) && !isa(O2)) || + (isa(O2) && isIdentifiedObject(O1) && !isa(O1))) + return AliasResult::NoAlias; + + // Function arguments can't alias with things that are known to be + // unambigously identified at the function level. + if ((isa(O1) && isIdentifiedFunctionLocal(O2)) || + (isa(O2) && isIdentifiedFunctionLocal(O1))) + return AliasResult::NoAlias; + + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && + AAQI.CI->isNotCapturedBeforeOrAt(O2, cast(O1))) + return AliasResult::NoAlias; + if (isEscapeSource(O2) && + AAQI.CI->isNotCapturedBeforeOrAt(O1, cast(O2))) + return AliasResult::NoAlias; + + return AliasResult::MayAlias; +} + +namespace { +// TODO: Consider moving this code to AliasAnalysis.h, to make it accessible to +// other alias analyses. +// TODO: TapirFnBehavior::View and TapirFnBehavior::Strand may be redundant. +enum class TapirFnBehavior : uint8_t { + None = 0, + Injective = 1, + Pure = 2, // including strand pure function in same strand + View = 4, + InjectiveOrPureOrView = Injective | Pure | View, + Strand = 8, // excluding strand pure function in same strand + Any = InjectiveOrPureOrView | Strand, +}; + +static const std::pair + TapirFnAttrTable[] = { + {Attribute::Injective, TapirFnBehavior::Injective}, + {Attribute::HyperView, TapirFnBehavior::View}, + {Attribute::StrandPure, TapirFnBehavior::Strand}, +}; + +static inline bool noTapirFnBehavior(const TapirFnBehavior TFB) { + return (static_cast(TFB) & + static_cast(TapirFnBehavior::Any)) == + static_cast(TapirFnBehavior::None); +} +static inline bool isInjectiveSet(const TapirFnBehavior TFB) { + return (static_cast(TFB) & + static_cast(TapirFnBehavior::Injective)) == + static_cast(TapirFnBehavior::Injective); +} +static inline bool isPureSet(const TapirFnBehavior TFB) { + return (static_cast(TFB) & + static_cast(TapirFnBehavior::Pure)) == + static_cast(TapirFnBehavior::Pure); +} +static inline bool isViewSet(const TapirFnBehavior TFB) { + return (static_cast(TFB) & + static_cast(TapirFnBehavior::View)) == + static_cast(TapirFnBehavior::View); +} +static inline bool isInjectiveOrPureOrViewSet(const TapirFnBehavior TFB) { + return static_cast(TFB) & + static_cast(TapirFnBehavior::InjectiveOrPureOrView); +} +static inline bool isStrandSet(const TapirFnBehavior TFB) { + return (static_cast(TFB) & + static_cast(TapirFnBehavior::Strand)) == + static_cast(TapirFnBehavior::Strand); +} +static inline TapirFnBehavior setPure(const TapirFnBehavior TFB) { + return TapirFnBehavior(static_cast(TFB) | + static_cast(TapirFnBehavior::Pure)); +} +static inline TapirFnBehavior clearPure(const TapirFnBehavior TFB) { + return TapirFnBehavior(static_cast(TFB) & + ~static_cast(TapirFnBehavior::Pure)); +} +static inline TapirFnBehavior clearStrand(const TapirFnBehavior TFB) { + return TapirFnBehavior(static_cast(TFB) & + ~static_cast(TapirFnBehavior::Strand)); +} +static inline TapirFnBehavior unionTapirFnBehavior(const TapirFnBehavior TFB1, + const TapirFnBehavior TFB2) { + return TapirFnBehavior(static_cast(TFB1) | + static_cast(TFB2)); +} +static inline TapirFnBehavior +intersectTapirFnBehavior(const TapirFnBehavior TFB1, + const TapirFnBehavior TFB2) { + return TapirFnBehavior(static_cast(TFB1) & + static_cast(TFB2)); +} +} // namespace + +// Tapir/OpenCilk code has some simple optimization opportunities. +// 1. Some runtime functions are injections, i.e., they return nonaliasing +// pointers when given nonaliasing arguments. +// 2. Some runtime functions are pure, or pure within a region of execution, +// which means the return values MustAlias if the arguments are identical. +// 3. View lookups return a value that does not alias anything that the +// argument does not alias (for simplicity, this implies injective). +// 4. Token lookups return a value that does not alias any alloca or global. +static const Value *getRecognizedArgument(const Value *V, bool InSameSpindle, + const Value *&Fn, + TapirFnBehavior &Behavior) { + const CallInst *C = dyn_cast(V); + if (!C) + return nullptr; + unsigned NumOperands = C->getNumOperands(); + if (NumOperands != 2 && NumOperands != 5) + return nullptr; + for (auto E : TapirFnAttrTable) { + if (C->hasFnAttr(E.first)) + Behavior = unionTapirFnBehavior(Behavior, E.second); + } + + // Make TapirFnBehavior::Strand and TapirFnBehavior::Pure mutually exclusive. + if (isStrandSet(Behavior)) { + if (InSameSpindle) + Behavior = setPure(clearStrand(Behavior)); + else + Behavior = clearPure(Behavior); + } else if (C->doesNotAccessMemory() && C->doesNotThrow() && + C->hasFnAttr(Attribute::WillReturn)) { + Behavior = setPure(Behavior); + } + + if (noTapirFnBehavior(Behavior)) + return nullptr; + Fn = C->getCalledOperand(); + return C->getOperand(0); +} + +AliasResult +BasicAAResult::checkInjectiveArguments(const Value *V1, const Value *O1, + const Value *V2, const Value *O2, + AAQueryInfo &AAQI) { + // V1 and V2 are the original pointers stripped of casts + // O1 and O2 are the underlying objects stripped of GEP as well + + const Value *Fn1 = nullptr, *Fn2 = nullptr; + TapirFnBehavior Behavior1 = TapirFnBehavior::None, + Behavior2 = TapirFnBehavior::None; + bool InSameSpindle = AAQI.AssumeSameSpindle; + const Value *A1 = getRecognizedArgument(V1, InSameSpindle, Fn1, Behavior1); + const Value *A2 = getRecognizedArgument(V2, InSameSpindle, Fn2, Behavior2); + + if (!isInjectiveOrPureOrViewSet(Behavior1) && + !isInjectiveOrPureOrViewSet(Behavior2)) + return AliasResult::MayAlias; + + // At least one value is a call to an understood function + assert(A1 || A2); + assert(!!A1 == !!Fn1); + assert(!!A2 == !!Fn2); + + // Calls to two different functions can not be analyzed. + if (Fn1 && Fn2 && Fn1 != Fn2) + return AliasResult::MayAlias; + + // Pure functions return equal values given equal arguments. + AliasResult Equal = + isPureSet(intersectTapirFnBehavior(Behavior1, Behavior2)) ? + AliasResult::MustAlias : AliasResult::MayAlias; + + // This is for testing. The intended use is with pointer arguments. + if (A1 && A2 && isInjectiveSet(Behavior1)) { + if (const ConstantInt *I1 = dyn_cast(A1)) { + if (const ConstantInt *I2 = dyn_cast(A2)) + return I1->getValue() == I2->getValue() ? + Equal : AliasResult(AliasResult::NoAlias); + return AliasResult::MayAlias; + } + } + + bool Known1 = false, Known2 = false; + const Value *U1 = nullptr, *U2 = nullptr; + + if (A1) { + U1 = getUnderlyingObject(A1, MaxLookupSearchDepth); + Known1 = isIdentifiedObject(U1); + } + if (A2) { + U2 = getUnderlyingObject(A2, MaxLookupSearchDepth); + Known2 = isIdentifiedObject(U2); + } + + // Rules, in order: + // 1. Potentially unequal values based on the same object may alias. + // 2. View lookups do not alias allocas that do not alias the argument + if (!A1) { + if (!Known2) + return AliasResult::MayAlias; + if (O1 == U2) // 1 + return AliasResult::MayAlias; + if (isViewSet(Behavior2)) // 2 + return UnderlyingNoAlias(O1, U2, AAQI); + return AliasResult::MayAlias; + } + if (!A2) { + if (!Known1) + return AliasResult::MayAlias; + if (U1 == O2) // 1 + return AliasResult::MayAlias; + if (isViewSet(Behavior1)) // 2 + return UnderlyingNoAlias(U1, O2, AAQI); + return AliasResult::MayAlias; + } + + if (!isInjectiveSet(Behavior1)) + return AliasResult::MayAlias; + + // Two calls to the same function with the same value. + if (isValueEqualInPotentialCycles(A1, A2, AAQI)) + return Equal; + + // Two calls with different values based on the same object. + if (U1 == U2) { + // TODO: Currently the caller only cares whether the result is NoAlias. + // If the caller relied on partial overlap detection a function like + // void *f(void *p) { return p; } + // could not be declared injective. + BasicAAResult::DecomposedGEP DecompGEP1 = + DecomposeGEPExpression(A1, DL, &AC, DT); + BasicAAResult::DecomposedGEP DecompGEP2 = + DecomposeGEPExpression(A2, DL, &AC, DT); + if (DecompGEP1.VarIndices.empty() && DecompGEP2.VarIndices.empty() && + isValueEqualInPotentialCycles(DecompGEP1.Base, DecompGEP2.Base, AAQI)) + return DecompGEP1.Offset == DecompGEP2.Offset + ? Equal + : AliasResult(AliasResult::NoAlias); + return AliasResult::MayAlias; + } + + return UnderlyingNoAlias(U1, U2, AAQI); +} + /// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as /// array references. AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, @@ -1475,38 +1738,17 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, if (!NullPointerIsDefined(&F, CPN->getType()->getAddressSpace())) return AliasResult::NoAlias; - if (O1 != O2) { - // If V1/V2 point to two different objects, we know that we have no alias. - if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) - return AliasResult::NoAlias; - - // Constant pointers can't alias with non-const isIdentifiedObject objects. - if ((isa(O1) && isIdentifiedObject(O2) && !isa(O2)) || - (isa(O2) && isIdentifiedObject(O1) && !isa(O1))) - return AliasResult::NoAlias; - - // Function arguments can't alias with things that are known to be - // unambigously identified at the function level. - if ((isa(O1) && isIdentifiedFunctionLocal(O2)) || - (isa(O2) && isIdentifiedFunctionLocal(O1))) - return AliasResult::NoAlias; + // If the call is an injection (distinct argument implies + // distinct return) some more optimization is possible. + AliasResult InjectiveResult = + checkInjectiveArguments(V1, O1, V2, O2, AAQI); + if (InjectiveResult == AliasResult::NoAlias) + return AliasResult::NoAlias; + else if (InjectiveResult == AliasResult::MustAlias) + return AliasResult::MayAlias; - // If one pointer is the result of a call/invoke or load and the other is a - // non-escaping local object within the same function, then we know the - // object couldn't escape to a point where the call could return it. - // - // Note that if the pointers are in different functions, there are a - // variety of complications. A call with a nocapture argument may still - // temporary store the nocapture argument's value in a temporary memory - // location if that memory location doesn't escape. Or it may pass a - // nocapture value to other functions as long as they don't capture it. - if (isEscapeSource(O1) && - AAQI.CI->isNotCapturedBeforeOrAt(O2, cast(O1))) - return AliasResult::NoAlias; - if (isEscapeSource(O2) && - AAQI.CI->isNotCapturedBeforeOrAt(O1, cast(O2))) - return AliasResult::NoAlias; - } + if (O1 != O2 && UnderlyingNoAlias(O1, O2, AAQI) == AliasResult::NoAlias) + return AliasResult::NoAlias; // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index 4a1797c42789a0..213bbfa3d1f371 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_component_library(LLVMAnalysis CodeMetrics.cpp ConstantFolding.cpp CycleAnalysis.cpp + DataRaceFreeAliasAnalysis.cpp DDG.cpp DDGPrinter.cpp ConstraintSystem.cpp @@ -126,6 +127,8 @@ add_llvm_component_library(LLVMAnalysis StackSafetyAnalysis.cpp SyntheticCountsUtils.cpp TFLiteUtils.cpp + TapirRaceDetect.cpp + TapirTaskInfo.cpp TargetLibraryInfo.cpp TargetTransformInfo.cpp TensorSpec.cpp @@ -140,6 +143,7 @@ add_llvm_component_library(LLVMAnalysis ValueTracking.cpp VectorUtils.cpp VFABIDemangling.cpp + WorkSpanAnalysis.cpp ${GeneratedMLSources} ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp index 00e096af3110b9..f22225676c916f 100644 --- a/llvm/lib/Analysis/CaptureTracking.cpp +++ b/llvm/lib/Analysis/CaptureTracking.cpp @@ -306,6 +306,49 @@ UseCaptureKind llvm::DetermineUseCaptureKind( function_ref IsDereferenceableOrNull) { Instruction *I = cast(U.getUser()); + if (ConstantExpr *CE = dyn_cast(I)) { + switch (CE->getOpcode()) { + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::Select: + // The original value is not captured via this if the new value isn't. + return UseCaptureKind::PASSTHROUGH; + case Instruction::ICmp: { + unsigned Idx = U.getOperandNo(); + unsigned OtherIdx = 1 - Idx; + if (auto *CPN = dyn_cast(CE->getOperand(OtherIdx))) { + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (CPN->getType()->getAddressSpace() == 0) + if (isNoAliasCall(U.get()->stripPointerCasts())) + return UseCaptureKind::NO_CAPTURE; + if (!I->getFunction()->nullPointerIsDefined()) { + auto *O = I->getOperand(Idx)->stripPointerCastsSameRepresentation(); + // Comparing a dereferenceable_or_null pointer against null cannot + // lead to pointer escapes, because if it is not null it must be a + // valid (in-bounds) pointer. + const DataLayout &DL = I->getModule()->getDataLayout(); + if (IsDereferenceableOrNull && IsDereferenceableOrNull(O, DL)) + return UseCaptureKind::NO_CAPTURE; + } + } + // Comparison against value stored in global variable. Given the pointer + // does not escape, its value cannot be guessed and stored separately in a + // global variable. + auto *LI = dyn_cast(CE->getOperand(OtherIdx)); + if (LI && isa(LI->getPointerOperand())) + return UseCaptureKind::NO_CAPTURE; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + return UseCaptureKind::MAY_CAPTURE; + } + default: + // Something else - be conservative and say it is captured. + return UseCaptureKind::MAY_CAPTURE; + } + } + switch (I->getOpcode()) { case Instruction::Call: case Instruction::Invoke: { diff --git a/llvm/lib/Analysis/CodeMetrics.cpp b/llvm/lib/Analysis/CodeMetrics.cpp index 2637e2f97dbb21..39a84a15801b9c 100644 --- a/llvm/lib/Analysis/CodeMetrics.cpp +++ b/llvm/lib/Analysis/CodeMetrics.cpp @@ -14,8 +14,10 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstructionCost.h" @@ -115,7 +117,8 @@ void CodeMetrics::collectEphemeralValues( /// block. void CodeMetrics::analyzeBasicBlock( const BasicBlock *BB, const TargetTransformInfo &TTI, - const SmallPtrSetImpl &EphValues, bool PrepareForLTO) { + const SmallPtrSetImpl &EphValues, bool PrepareForLTO, + TargetLibraryInfo *TLI) { ++NumBlocks; InstructionCost NumInstsBeforeThisBB = NumInsts; for (const Instruction &I : *BB) { @@ -147,6 +150,13 @@ void CodeMetrics::analyzeBasicBlock( if (IsLoweredToCall) ++NumCalls; + + // Check for a call to a builtin function or a Tapir-target library + // function. + LibFunc LF; + if (TLI && (TLI->getLibFunc(*F, LF) || TLI->isTapirTargetLibFunc(*F))) + ++NumBuiltinCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. @@ -163,8 +173,13 @@ void CodeMetrics::analyzeBasicBlock( if (isa(I) || I.getType()->isVectorTy()) ++NumVectorInsts; - if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) - notDuplicatable = true; + if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) { + if (const IntrinsicInst *II = dyn_cast(&I)) { + if (Intrinsic::syncregion_start != II->getIntrinsicID()) + notDuplicatable = true; + } else + notDuplicatable = true; + } if (const CallInst *CI = dyn_cast(&I)) { if (CI->cannotDuplicate()) diff --git a/llvm/lib/Analysis/DataRaceFreeAliasAnalysis.cpp b/llvm/lib/Analysis/DataRaceFreeAliasAnalysis.cpp new file mode 100644 index 00000000000000..87df8da99f364a --- /dev/null +++ b/llvm/lib/Analysis/DataRaceFreeAliasAnalysis.cpp @@ -0,0 +1,145 @@ +//===- DataRaceFreeAliasAnalysis.cpp - DRF-based Alias Analysis -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the DataRaceFreeAliasAnalysis pass, which implements alias +// analysis based on the assumption that a Tapir program is data-race free. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DataRaceFreeAliasAnalysis.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/Instruction.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define DEBUG_TYPE "drf-aa-result" + +cl::opt llvm::EnableDRFAA( + "enable-drf-aa", cl::init(false), cl::Hidden, + cl::desc("Enable AA based on the data-race-free assumption " + "(default = off)")); + +bool DRFAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // We don't care if this analysis itself is preserved, it has no state. But we + // need to check that the analyses it depends on have been. + if (Inv.invalidate(Fn, PA)) + return true; + + // Otherwise this analysis result remains valid. + return false; +} + +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast(V)) { + if (!inst->getParent()) + return nullptr; + return inst->getParent()->getParent(); + } + + if (const Argument *arg = dyn_cast(V)) + return arg->getParent(); + + return nullptr; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + +AliasResult DRFAAResult::alias(const MemoryLocation &LocA, + const MemoryLocation &LocB, AAQueryInfo &AAQI, + const Instruction *CtxI) { + if (!EnableDRFAA) + return AAResultBase::alias(LocA, LocB, AAQI, CtxI); + + LLVM_DEBUG(dbgs() << "DRFAA:\n\tLocA.Ptr = " << *LocA.Ptr + << "\n\tLocB.Ptr = " << *LocB.Ptr << "\n"); + assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && + "DRFAliasAnalysis doesn't support interprocedural queries."); + + if (const Instruction *AddrA = dyn_cast(LocA.Ptr)) + if (const Instruction *AddrB = dyn_cast(LocB.Ptr)) + if (TI.mayHappenInParallel(AddrA->getParent(), AddrB->getParent())) + return AliasResult::NoAlias; + return AAResultBase::alias(LocA, LocB, AAQI, CtxI); +} + +ModRefInfo DRFAAResult::getModRefInfo(const CallBase *Call, + const MemoryLocation &Loc, + AAQueryInfo &AAQI) { + if (!EnableDRFAA) + return AAResultBase::getModRefInfo(Call, Loc, AAQI); + + LLVM_DEBUG(dbgs() << "DRFAA:getModRefInfo(Call, Loc)\n"); + assert(notDifferentParent(Call, Loc.Ptr) && + "DRFAliasAnalysis doesn't support interprocedural queries."); + + if (const Instruction *Addr = dyn_cast(Loc.Ptr)) + if (TI.mayHappenInParallel(Call->getParent(), Addr->getParent())) + return ModRefInfo::NoModRef; + + return AAResultBase::getModRefInfo(Call, Loc, AAQI); +} + +ModRefInfo DRFAAResult::getModRefInfo(const CallBase *Call1, + const CallBase *Call2, + AAQueryInfo &AAQI) { + if (!EnableDRFAA) + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); + + LLVM_DEBUG(dbgs() << "DRFAA:getModRefInfo(Call1, Call2)\n"); + + if (TI.mayHappenInParallel(Call1->getParent(), Call2->getParent())) + return ModRefInfo::NoModRef; + + return AAResultBase::getModRefInfo(Call1, Call2, AAQI); +} + +AnalysisKey DRFAA::Key; + +DRFAAResult DRFAA::run(Function &F, FunctionAnalysisManager &AM) { + return DRFAAResult(AM.getResult(F)); +} + +char DRFAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(DRFAAWrapperPass, "drf-aa", + "DRF-based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(DRFAAWrapperPass, "drf-aa", + "DRF-based Alias Analysis", false, true) + +FunctionPass *llvm::createDRFAAWrapperPass() { + return new DRFAAWrapperPass(); +} + +DRFAAWrapperPass::DRFAAWrapperPass() : FunctionPass(ID) { + initializeDRFAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool DRFAAWrapperPass::runOnFunction(Function &F) { + Result.reset( + new DRFAAResult(getAnalysis().getTaskInfo())); + return false; +} + +void DRFAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); +} diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 1bce9aae09bb26..6bc5bb4afa1d30 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -54,6 +54,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Delinearization.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" @@ -376,6 +377,26 @@ bool FullDependence::isSplitable(unsigned Level) const { } +//===----------------------------------------------------------------------===// +// GeneralAccess methods + +raw_ostream &llvm::operator<<(raw_ostream &OS, const GeneralAccess &GA) { + if (!GA.isValid()) + OS << "(invalid GeneralAccess)"; + else { + OS << "(GA.I: " << *GA.I; + OS << ", GA.Loc: "; + if (!GA.Loc) + OS << "nullptr"; + else + OS << *GA.Loc->Ptr; + OS << ", GA.OperandNum: " << static_cast(GA.OperandNum); + OS << ", GA.ModRef: " << static_cast(GA.ModRef); + OS << ")"; + } + return OS; +} + //===----------------------------------------------------------------------===// // DependenceInfo::Constraint methods @@ -831,6 +852,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src, } CommonLevels = SrcLevel; MaxLevels -= CommonLevels; + CommonLoop = SrcLoop; } @@ -1049,7 +1071,7 @@ DependenceInfo::classifyPair(const SCEV *Src, const Loop *SrcLoopNest, // we try simple subtraction, which seems to help in some cases // involving symbolics. bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, - const SCEV *Y) const { + const SCEV *Y, const Loop *L) const { if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { if ((isa(X) && @@ -1068,6 +1090,9 @@ bool DependenceInfo::isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *X, } if (SE->isKnownPredicate(Pred, X, Y)) return true; + if (L && isLoopInvariant(X, L) && isLoopInvariant(Y, L) && + isTrueAtLoopEntry(L, Pred, X, Y)) + return true; // If SE->isKnownPredicate can't prove the condition, // we try the brute-force approach of subtracting // and testing the difference. @@ -2804,10 +2829,10 @@ bool DependenceInfo::testBounds(unsigned char DirKind, unsigned Level, BoundInfo *Bound, const SCEV *Delta) const { Bound[Level].Direction = DirKind; if (const SCEV *LowerBound = getLowerBound(Bound)) - if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta)) + if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta, CommonLoop)) return false; if (const SCEV *UpperBound = getUpperBound(Bound)) - if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound)) + if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound, CommonLoop)) return false; return true; } @@ -2842,10 +2867,12 @@ void DependenceInfo::findBoundsALL(CoefficientInfo *A, CoefficientInfo *B, } else { // If the difference is 0, we won't need to know the number of iterations. - if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart)) + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart, + CommonLoop)) Bound[K].Lower[Dependence::DVEntry::ALL] = SE->getZero(A[K].Coeff->getType()); - if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart)) + if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart, + CommonLoop)) Bound[K].Upper[Dependence::DVEntry::ALL] = SE->getZero(A[K].Coeff->getType()); } @@ -2980,14 +3007,43 @@ void DependenceInfo::findBoundsGT(CoefficientInfo *A, CoefficientInfo *B, } +// Returns true if predicate LHS `Pred` RHS is true at entry of L. +bool DependenceInfo::isTrueAtLoopEntry(const Loop *L, ICmpInst::Predicate Pred, + const SCEV *LHS, const SCEV *RHS) const { + return SE->isLoopEntryGuardedByCond(L, Pred, LHS, RHS); +} + + // X^+ = max(X, 0) const SCEV *DependenceInfo::getPositivePart(const SCEV *X) const { + if (CommonLoop) { + const SCEV *Zero = SE->getZero(X->getType()); + if (!SE->isLoopInvariant(X, CommonLoop)) + return SE->getSMaxExpr(X, SE->getZero(X->getType())); + if (isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGT, X, Zero) || + isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGT, + Zero, SE->getNegativeSCEV(X))) + return X; + if (isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGE, Zero, X)) + return Zero; + } return SE->getSMaxExpr(X, SE->getZero(X->getType())); } // X^- = min(X, 0) const SCEV *DependenceInfo::getNegativePart(const SCEV *X) const { + if (CommonLoop) { + const SCEV *Zero = SE->getZero(X->getType()); + if (!SE->isLoopInvariant(X, CommonLoop)) + return SE->getSMinExpr(X, SE->getZero(X->getType())); + if (isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGT, Zero, X) || + isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGT, + SE->getNegativeSCEV(X), Zero)) + return X; + if (isTrueAtLoopEntry(CommonLoop, CmpInst::ICMP_SGE, X, Zero)) + return Zero; + } return SE->getSMinExpr(X, SE->getZero(X->getType())); } @@ -3013,6 +3069,14 @@ DependenceInfo::collectCoeffInfo(const SCEV *Subscript, bool SrcFlag, CI[K].PosPart = getPositivePart(CI[K].Coeff); CI[K].NegPart = getNegativePart(CI[K].Coeff); CI[K].Iterations = collectUpperBound(L, Subscript->getType()); + if (const SCEVCastExpr *Cast = + dyn_cast(CI[K].PosPart)) { + auto *ReplSCEV = SE->getZeroExtendExpr(Cast->getOperand(), + Subscript->getType()); + if (CI[K].Coeff == CI[K].PosPart) + CI[K].Coeff = ReplSCEV; + CI[K].PosPart = ReplSCEV; + } Subscript = AddRec->getStart(); } Constant = Subscript; @@ -4199,3 +4263,678 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep, llvm_unreachable("somehow reached end of routine"); return nullptr; } + +static Value *getGeneralAccessPointerOperand(GeneralAccess *A) { + return const_cast(A->Loc->Ptr); +} + +static +const SCEV *getElementSize(GeneralAccess *A, ScalarEvolution *SE) { + Type *Ty = getGeneralAccessPointerOperand(A)->getType(); + Type *ETy = SE->getEffectiveSCEVType(PointerType::getUnqual(Ty)); + if (A->Loc) { + if (A->Loc->Size.hasValue()) + return SE->getConstant(ETy, A->Loc->Size.getValue()); + else + return SE->getCouldNotCompute(); + } else + return SE->getCouldNotCompute(); +} + +/// Check if we can delinearize the subscripts. If the SCEVs representing the +/// source and destination array references are recurrences on a nested loop, +/// this function flattens the nested recurrences into separate recurrences +/// for each loop level. +bool DependenceInfo::tryDelinearize(GeneralAccess *SrcA, GeneralAccess *DstA, + SmallVectorImpl &Pair) { + Value *SrcPtr = getGeneralAccessPointerOperand(SrcA); + Value *DstPtr = getGeneralAccessPointerOperand(DstA); + + Loop *SrcLoop = LI->getLoopFor(SrcA->I->getParent()); + Loop *DstLoop = LI->getLoopFor(DstA->I->getParent()); + + // Below code mimics the code in Delinearization.cpp + const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop); + const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + + if (!SrcBase || !DstBase || SrcBase != DstBase) + return false; + + + SmallVector SrcSubscripts, DstSubscripts; + + if (!tryDelinearizeFixedSize(SrcA, DstA, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts) && + !tryDelinearizeParametricSize(SrcA, DstA, SrcAccessFn, DstAccessFn, + SrcSubscripts, DstSubscripts)) + return false; + + int Size = SrcSubscripts.size(); + LLVM_DEBUG({ + dbgs() << "\nSrcSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *SrcSubscripts[I]; + dbgs() << "\nDstSubscripts: "; + for (int I = 0; I < Size; I++) + dbgs() << *DstSubscripts[I]; + }); + + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(Size); + for (int I = 0; I < Size; ++I) { + Pair[I].Src = SrcSubscripts[I]; + Pair[I].Dst = DstSubscripts[I]; + unifySubscriptType(&Pair[I]); + } + + return true; +} + +static bool tryDelinearizeGAFixedSizeImpl( + ScalarEvolution *SE, GeneralAccess *GA, const SCEV *AccessFn, + SmallVectorImpl &Subscripts, SmallVectorImpl &Sizes) { + Value *SrcPtr = getGeneralAccessPointerOperand(GA); + + // Check the simple case where the array dimensions are fixed size. + auto *SrcGEP = dyn_cast(SrcPtr); + if (!SrcGEP) + return false; + + getIndexExpressionsFromGEP(*SE, SrcGEP, Subscripts, Sizes); + + // Check that the two size arrays are non-empty and equal in length and + // value. + // TODO: it would be better to let the caller to clear Subscripts, similar + // to how we handle Sizes. + if (Sizes.empty() || Subscripts.size() <= 1) { + Subscripts.clear(); + return false; + } + + // Check that for identical base pointers we do not miss index offsets + // that have been added before this GEP is applied. + Value *SrcBasePtr = SrcGEP->getOperand(0)->stripPointerCasts(); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(AccessFn)); + if (!SrcBase || SrcBasePtr != SrcBase->getValue()) { + Subscripts.clear(); + return false; + } + + assert(Subscripts.size() == Sizes.size() + 1 && + "Expected equal number of entries in the list of size and " + "subscript."); + + return true; +} + +bool DependenceInfo::tryDelinearizeFixedSize( + GeneralAccess *SrcA, GeneralAccess *DstA, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts) { + LLVM_DEBUG({ + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); + }); + + SmallVector SrcSizes; + SmallVector DstSizes; + if (!tryDelinearizeGAFixedSizeImpl(SE, SrcA, SrcAccessFn, SrcSubscripts, + SrcSizes) || + !tryDelinearizeGAFixedSizeImpl(SE, DstA, DstAccessFn, DstSubscripts, + DstSizes)) + return false; + + // Check that the two size arrays are non-empty and equal in length and + // value. + if (SrcSizes.size() != DstSizes.size() || + !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) { + SrcSubscripts.clear(); + DstSubscripts.clear(); + return false; + } + + assert(SrcSubscripts.size() == DstSubscripts.size() && + "Expected equal number of entries in the list of SrcSubscripts and " + "DstSubscripts."); + + Value *SrcPtr = getGeneralAccessPointerOperand(SrcA); + Value *DstPtr = getGeneralAccessPointerOperand(DstA); + + // In general we cannot safely assume that the subscripts recovered from GEPs + // are in the range of values defined for their corresponding array + // dimensions. For example some C language usage/interpretation make it + // impossible to verify this at compile-time. As such we give up here unless + // we can assume that the subscripts do not overlap into neighboring + // dimensions and that the number of dimensions matches the number of + // subscripts being recovered. + if (!DisableDelinearizationChecks) { + auto AllIndiciesInRange = [&](SmallVector &DimensionSizes, + SmallVectorImpl &Subscripts, + Value *Ptr) { + size_t SSize = Subscripts.size(); + for (size_t I = 1; I < SSize; ++I) { + const SCEV *S = Subscripts[I]; + if (!isKnownNonNegative(S, Ptr)) + return false; + if (auto *SType = dyn_cast(S->getType())) { + const SCEV *Range = SE->getConstant( + ConstantInt::get(SType, DimensionSizes[I - 1], false)); + if (!isKnownLessThan(S, Range)) + return false; + } + } + return true; + }; + + if (!AllIndiciesInRange(SrcSizes, SrcSubscripts, SrcPtr) || + !AllIndiciesInRange(DstSizes, DstSubscripts, DstPtr)) { + SrcSubscripts.clear(); + DstSubscripts.clear(); + return false; + } + } + LLVM_DEBUG({ + dbgs() << "Delinearized subscripts of fixed-size array\n" + << "SrcGEP:" << *SrcPtr << "\n" + << "DstGEP:" << *DstPtr << "\n"; + }); + return true; +} + +bool DependenceInfo::tryDelinearizeParametricSize( + GeneralAccess *SrcA, GeneralAccess *DstA, const SCEV *SrcAccessFn, + const SCEV *DstAccessFn, SmallVectorImpl &SrcSubscripts, + SmallVectorImpl &DstSubscripts) { + + Value *SrcPtr = getGeneralAccessPointerOperand(SrcA); + Value *DstPtr = getGeneralAccessPointerOperand(DstA); + const SCEVUnknown *SrcBase = + dyn_cast(SE->getPointerBase(SrcAccessFn)); + const SCEVUnknown *DstBase = + dyn_cast(SE->getPointerBase(DstAccessFn)); + assert(SrcBase && DstBase && SrcBase == DstBase && + "expected src and dst scev unknowns to be equal"); + + const SCEV *ElementSize = getElementSize(SrcA, SE); + if (isa(ElementSize)) + return false; + if (ElementSize != getElementSize(DstA, SE)) + return false; + + const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); + const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase); + + const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); + const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); + if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) + return false; + + // First step: collect parametric terms in both array references. + SmallVector Terms; + collectParametricTerms(*SE, SrcAR, Terms); + collectParametricTerms(*SE, DstAR, Terms); + + // Second step: find subscript sizes. + SmallVector Sizes; + findArrayDimensions(*SE, Terms, Sizes, ElementSize); + + // Third step: compute the access functions for each subscript. + computeAccessFunctions(*SE, SrcAR, SrcSubscripts, Sizes); + computeAccessFunctions(*SE, DstAR, DstSubscripts, Sizes); + + // Fail when there is only a subscript: that's a linearized access function. + if (SrcSubscripts.size() < 2 || DstSubscripts.size() < 2 || + SrcSubscripts.size() != DstSubscripts.size()) + return false; + + size_t Size = SrcSubscripts.size(); + + // Statically check that the array bounds are in-range. The first subscript we + // don't have a size for and it cannot overflow into another subscript, so is + // always safe. The others need to be 0 <= subscript[i] < bound, for both src + // and dst. + // FIXME: It may be better to record these sizes and add them as constraints + // to the dependency checks. + if (!DisableDelinearizationChecks) + for (size_t I = 1; I < Size; ++I) { + if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr)) + return false; + + if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1])) + return false; + + if (!isKnownNonNegative(DstSubscripts[I], DstPtr)) + return false; + + if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1])) + return false; + } + + return true; +} + +// depends - +// Returns NULL if there is no dependence. +// Otherwise, return a Dependence with as many details as possible. +// Corresponds to Section 3.1 in the paper +// +// Practical Dependence Testing +// Goff, Kennedy, Tseng +// PLDI 1991 +std::unique_ptr +DependenceInfo::depends(GeneralAccess *SrcA, GeneralAccess *DstA, + bool PossiblyLoopIndependent) { + if (SrcA == DstA) + PossiblyLoopIndependent = false; + + Instruction *Src = SrcA->I; + Instruction *Dst = DstA->I; + + if (!Src || !Dst) + // If we don't have a source or destination instruction, we don't have a + // dependence. + return nullptr; + + if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) + // if both instructions don't reference memory, there's no dependence + return nullptr; + + if (!SrcA->isValid() || !DstA->isValid()) { + LLVM_DEBUG(dbgs() << "could not interpret general accesses\n"); + return std::make_unique(Src, Dst); + } + + Value *SrcPtr = getGeneralAccessPointerOperand(SrcA); + Value *DstPtr = getGeneralAccessPointerOperand(DstA); + + switch (underlyingObjectsAlias(AA, F->getParent()->getDataLayout(), + *DstA->Loc, *SrcA->Loc)) { + case AliasResult::MayAlias: + case AliasResult::PartialAlias: + // cannot analyse objects if we don't understand their aliasing. + LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n"); + return std::make_unique(Src, Dst); + case AliasResult::NoAlias: + // If the objects noalias, they are distinct, accesses are independent. + LLVM_DEBUG(dbgs() << "no alias\n"); + return nullptr; + case AliasResult::MustAlias: + break; // The underlying objects alias; test accesses for dependence. + } + + // If either Src or Dst is a call, and we are uncertain about the accessed + // location's size, give up. + if (isa(Src)) + if (!SrcA->Loc->Size.hasValue()) + return std::make_unique(Src, Dst); + if (isa(Dst)) + if (!DstA->Loc->Size.hasValue()) + return std::make_unique(Src, Dst); + + // establish loop nesting levels + establishNestingLevels(Src, Dst); + LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + ++TotalArrayPairs; + + unsigned Pairs = 1; + SmallVector Pair(Pairs); + if (!SE->isSCEVable(SrcPtr->getType()) || + !SE->isSCEVable(DstPtr->getType())) { + LLVM_DEBUG(dbgs() << "can't analyze non-scevable pointers\n"); + return std::make_unique(Src, Dst); + } + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + LLVM_DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + if (SE->getPointerBase(SrcSCEV) != SE->getPointerBase(DstSCEV)) { + // If two pointers have different bases, trying to analyze indexes won't + // work; we can't compare them to each other. This can happen, for example, + // if one is produced by an LCSSA PHI node. + // + // We check this upfront so we don't crash in cases where getMinusSCEV() + // returns a SCEVCouldNotCompute. + LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n"); + return std::make_unique(Src, Dst); + } + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + + if (Delinearize) { + if (tryDelinearize(SrcA, DstA, Pair)) { + LLVM_DEBUG(dbgs() << " delinearized\n"); + Pairs = Pair.size(); + } + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + LLVM_DEBUG(dbgs() << " subscript " << P << "\n"); + LLVM_DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + LLVM_DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + LLVM_DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); + LLVM_DEBUG(dbgs() << "\tloops = "); + LLVM_DEBUG(dumpSmallBitVector(Pair[P].Loops)); + } + + SmallBitVector Separable(Pairs); + SmallBitVector Coupled(Pairs); + + // Partition subscripts into separable and minimally-coupled groups + // Algorithm in paper is algorithmically better; + // this may be faster in practice. Check someday. + // + // Here's an example of how it works. Consider this code: + // + // for (i = ...) { + // for (j = ...) { + // for (k = ...) { + // for (l = ...) { + // for (m = ...) { + // A[i][j][k][m] = ...; + // ... = A[0][j][l][i + j]; + // } + // } + // } + // } + // } + // + // There are 4 subscripts here: + // 0 [i] and [0] + // 1 [j] and [j] + // 2 [k] and [l] + // 3 [m] and [i + j] + // + // We've already classified each subscript pair as ZIV, SIV, etc., + // and collected all the loops mentioned by pair P in Pair[P].Loops. + // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops + // and set Pair[P].Group = {P}. + // + // Src Dst Classification Loops GroupLoops Group + // 0 [i] [0] SIV {1} {1} {0} + // 1 [j] [j] SIV {2} {2} {1} + // 2 [k] [l] RDIV {3,4} {3,4} {2} + // 3 [m] [i + j] MIV {1,2,5} {1,2,5} {3} + // + // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ. + // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc. + // + // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty. + // Next, 0 and 2. Again, the intersection of their GroupLoops is empty. + // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty, + // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added + // to either Separable or Coupled). + // + // Next, we consider 1 and 2. The intersection of the GroupLoops is empty. + // Next, 1 and 3. The intersection of their GroupLoops = {2}, not empty, + // so Pair[3].Group = {0, 1, 3} and Done = false. + // + // Next, we compare 2 against 3. The intersection of the GroupLoops is empty. + // Since Done remains true, we add 2 to the set of Separable pairs. + // + // Finally, we consider 3. There's nothing to compare it with, + // so Done remains true and we add it to the Coupled set. + // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}. + // + // In the end, we've got 1 separable subscript and 1 coupled group. + for (unsigned SI = 0; SI < Pairs; ++SI) { + if (Pair[SI].Classification == Subscript::NonLinear) { + // ignore these, but collect loops for later + ++NonlinearSubscriptPairs; + collectCommonLoops(Pair[SI].Src, + LI->getLoopFor(Src->getParent()), + Pair[SI].Loops); + collectCommonLoops(Pair[SI].Dst, + LI->getLoopFor(Dst->getParent()), + Pair[SI].Loops); + Result.Consistent = false; + } else if (Pair[SI].Classification == Subscript::ZIV) { + // always separable + Separable.set(SI); + } + else { + // SIV, RDIV, or MIV, so check for coupled group + bool Done = true; + for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) { + SmallBitVector Intersection = Pair[SI].GroupLoops; + Intersection &= Pair[SJ].GroupLoops; + if (Intersection.any()) { + // accumulate set of all the loops in group + Pair[SJ].GroupLoops |= Pair[SI].GroupLoops; + // accumulate set of all subscripts in group + Pair[SJ].Group |= Pair[SI].Group; + Done = false; + } + } + if (Done) { + if (Pair[SI].Group.count() == 1) { + Separable.set(SI); + ++SeparableSubscriptPairs; + } + else { + Coupled.set(SI); + ++CoupledSubscriptPairs; + } + } + } + } + + LLVM_DEBUG(dbgs() << " Separable = "); + LLVM_DEBUG(dumpSmallBitVector(Separable)); + LLVM_DEBUG(dbgs() << " Coupled = "); + LLVM_DEBUG(dumpSmallBitVector(Coupled)); + + Constraint NewConstraint; + NewConstraint.setAny(SE); + + // test separable subscripts + for (unsigned SI : Separable.set_bits()) { + LLVM_DEBUG(dbgs() << "testing subscript " << SI); + switch (Pair[SI].Classification) { + case Subscript::ZIV: + LLVM_DEBUG(dbgs() << ", ZIV\n"); + if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return nullptr; + break; + case Subscript::SIV: { + LLVM_DEBUG(dbgs() << ", SIV\n"); + unsigned Level; + const SCEV *SplitIter = nullptr; + if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint, + SplitIter)) + return nullptr; + break; + } + case Subscript::RDIV: + LLVM_DEBUG(dbgs() << ", RDIV\n"); + if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result)) + return nullptr; + break; + case Subscript::MIV: + LLVM_DEBUG(dbgs() << ", MIV\n"); + if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result)) + return nullptr; + break; + default: + llvm_unreachable("subscript has unexpected classification"); + } + } + + if (Coupled.count()) { + // test coupled subscript groups + LLVM_DEBUG(dbgs() << "starting on coupled subscripts\n"); + LLVM_DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n"); + SmallVector Constraints(MaxLevels + 1); + for (unsigned II = 0; II <= MaxLevels; ++II) + Constraints[II].setAny(SE); + for (unsigned SI : Coupled.set_bits()) { + LLVM_DEBUG(dbgs() << "testing subscript group " << SI << " { "); + SmallBitVector Group(Pair[SI].Group); + SmallBitVector Sivs(Pairs); + SmallBitVector Mivs(Pairs); + SmallBitVector ConstrainedLevels(MaxLevels + 1); + SmallVector PairsInGroup; + for (unsigned SJ : Group.set_bits()) { + LLVM_DEBUG(dbgs() << SJ << " "); + if (Pair[SJ].Classification == Subscript::SIV) + Sivs.set(SJ); + else + Mivs.set(SJ); + PairsInGroup.push_back(&Pair[SJ]); + } + unifySubscriptType(PairsInGroup); + LLVM_DEBUG(dbgs() << "}\n"); + while (Sivs.any()) { + bool Changed = false; + for (unsigned SJ : Sivs.set_bits()) { + LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n"); + // SJ is an SIV subscript that's part of the current coupled group + unsigned Level; + const SCEV *SplitIter = nullptr; + LLVM_DEBUG(dbgs() << "SIV\n"); + if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint, + SplitIter)) + return nullptr; + ConstrainedLevels.set(Level); + if (intersectConstraints(&Constraints[Level], &NewConstraint)) { + if (Constraints[Level].isEmpty()) { + ++DeltaIndependence; + return nullptr; + } + Changed = true; + } + Sivs.reset(SJ); + } + if (Changed) { + // propagate, possibly creating new SIVs and ZIVs + LLVM_DEBUG(dbgs() << " propagating\n"); + LLVM_DEBUG(dbgs() << "\tMivs = "); + LLVM_DEBUG(dumpSmallBitVector(Mivs)); + for (unsigned SJ : Mivs.set_bits()) { + // SJ is an MIV subscript that's part of the current coupled group + LLVM_DEBUG(dbgs() << "\tSJ = " << SJ << "\n"); + if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, + Constraints, Result.Consistent)) { + LLVM_DEBUG(dbgs() << "\t Changed\n"); + ++DeltaPropagations; + Pair[SJ].Classification = + classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()), + Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()), + Pair[SJ].Loops); + switch (Pair[SJ].Classification) { + case Subscript::ZIV: + LLVM_DEBUG(dbgs() << "ZIV\n"); + if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return nullptr; + Mivs.reset(SJ); + break; + case Subscript::SIV: + Sivs.set(SJ); + Mivs.reset(SJ); + break; + case Subscript::RDIV: + case Subscript::MIV: + break; + default: + llvm_unreachable("bad subscript classification"); + } + } + } + } + } + + // test & propagate remaining RDIVs + for (unsigned SJ : Mivs.set_bits()) { + if (Pair[SJ].Classification == Subscript::RDIV) { + LLVM_DEBUG(dbgs() << "RDIV test\n"); + if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result)) + return nullptr; + // I don't yet understand how to propagate RDIV results + Mivs.reset(SJ); + } + } + + // test remaining MIVs + // This code is temporary. + // Better to somehow test all remaining subscripts simultaneously. + for (unsigned SJ : Mivs.set_bits()) { + if (Pair[SJ].Classification == Subscript::MIV) { + LLVM_DEBUG(dbgs() << "MIV test\n"); + if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result)) + return nullptr; + } + else + llvm_unreachable("expected only MIV subscripts at this point"); + } + + // update Result.DV from constraint vector + LLVM_DEBUG(dbgs() << " updating\n"); + for (unsigned SJ : ConstrainedLevels.set_bits()) { + if (SJ > CommonLevels) + break; + updateDirection(Result.DV[SJ - 1], Constraints[SJ]); + if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE) + return nullptr; + } + } + } + + // Make sure the Scalar flags are set correctly. + SmallBitVector CompleteLoops(MaxLevels + 1); + for (unsigned SI = 0; SI < Pairs; ++SI) + CompleteLoops |= Pair[SI].Loops; + for (unsigned II = 1; II <= CommonLevels; ++II) + if (CompleteLoops[II]) + Result.DV[II - 1].Scalar = false; + + if (PossiblyLoopIndependent) { + // Make sure the LoopIndependent flag is set correctly. + // All directions must include equal, otherwise no + // loop-independent dependence is possible. + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { + Result.LoopIndependent = false; + break; + } + } + } + else { + // On the other hand, if all directions are equal and there's no + // loop-independent dependence possible, then no dependence exists. + bool AllEqual = true; + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (Result.getDirection(II) != Dependence::DVEntry::EQ) { + AllEqual = false; + break; + } + } + if (AllEqual) + return nullptr; + } + + return std::make_unique(std::move(Result)); +} diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index a2f46edcf5ef90..4f86282edc9177 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -479,6 +479,8 @@ class CallAnalyzer : public InstVisitor { bool visitCleanupReturnInst(CleanupReturnInst &RI); bool visitCatchReturnInst(CatchReturnInst &RI); bool visitUnreachableInst(UnreachableInst &I); + bool visitReattachInst(ReattachInst &RI); + bool visitSyncInst(SyncInst &RI); public: CallAnalyzer(Function &Callee, CallBase &Call, const TargetTransformInfo &TTI, @@ -2252,6 +2254,11 @@ bool CallAnalyzer::visitCallBase(CallBase &Call) { return simplifyIntrinsicCallIsConstant(Call); case Intrinsic::objectsize: return simplifyIntrinsicCallObjectSize(Call); + case Intrinsic::detached_rethrow: + case Intrinsic::taskframe_resume: + // Similarly to returns from a spawned task, we treat detached.rethrow and + // taskframe.resume intrinsics as free. + return true; } } @@ -2435,6 +2442,16 @@ bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { return true; // No actual code is needed for unreachable. } +bool CallAnalyzer::visitReattachInst(ReattachInst &RI) { + // We model reattach instructions as free, sort of like return instructions. + return true; +} + +bool CallAnalyzer::visitSyncInst(SyncInst &SI) { + // We model sync instructions as free, sort of like unconditional branches. + return true; +} + bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index fd0e81c51ac806..ced1483cea6a58 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" @@ -142,6 +143,13 @@ static cl::opt SpeculateUnitStride( cl::desc("Speculate that non-constant strides are unit in LAA"), cl::init(true)); +/// Enable analysis using Tapir based on the data-race-free assumption. +static cl::opt EnableDRFAA( + "enable-drf-laa", cl::Hidden, + cl::desc("Enable analysis using Tapir based on the data-race-free " + "assumption"), + cl::init(false)); + bool VectorizerParams::isInterleaveForced() { return ::VectorizationInterleave.getNumOccurrences() > 0; } @@ -1710,6 +1718,12 @@ void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { Status = S; } +/// Returns true if this loop is logically parallel as indicated by Tapir. +static bool isLogicallyParallelViaTapir(const Loop *L, TaskInfo *TI) { + return L->wasDerivedFromTapirLoop() || + (TI && getTaskIfTapirLoopStructure(L, TI)); +} + /// Given a dependence-distance \p Dist between two /// memory accesses, that have the same stride whose absolute value is given /// in \p Stride, and that have the same type size \p TypeByteSize, @@ -1828,6 +1842,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, if (!AIsWrite && !BIsWrite) return Dependence::NoDep; + // Under certain assumptions, Tapir can guarantee that there are no + // loop-carried dependencies. + if (EnableDRFAA && isLogicallyParallelViaTapir(InnermostLoop, TI)) + return Dependence::NoDep; + // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != BPtr->getType()->getPointerAddressSpace()) @@ -2055,6 +2074,12 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); + // Backward dependencies cannot happen in Tapir loops. + if ((Dependence::Backward == Type || + Dependence::BackwardVectorizable == Type || + Dependence::BackwardVectorizableButPreventsForwarding == Type) + && isLogicallyParallelViaTapir(InnermostLoop, TI)) + Type = Dependence::NoDep; mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences @@ -2145,7 +2170,7 @@ bool LoopAccessInfo::canAnalyzeLoop() { void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, - DominatorTree *DT) { + DominatorTree *DT, TaskInfo *TI) { // Holds the Load and Store instructions. SmallVector Loads; SmallVector Stores; @@ -2162,7 +2187,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, PtrRtChecking->Pointers.clear(); PtrRtChecking->Need = false; - const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); + const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel() || + (EnableDRFAA && isLogicallyParallelViaTapir(TheLoop, TI)); const bool EnableMemAccessVersioningOfLoop = EnableMemAccessVersioning && @@ -2209,6 +2235,10 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, !VFDatabase::getMappings(*Call).empty()) continue; + // Ignore Tapir instructions. + if (isa(&I) || isa(&I) || isa(&I)) + continue; + auto *Ld = dyn_cast(&I); if (!Ld) { recordAnalysis("CantVectorizeInstruction", Ld) @@ -2233,6 +2263,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Save 'store' instructions. Abort if other instructions write to memory. if (I.mayWriteToMemory()) { + // TODO: Determine if we should do something other than ignore Tapir + // instructions here. + if (isa(&I) || isa(&I) || isa(&I)) + continue; + auto *St = dyn_cast(&I); if (!St) { recordAnalysis("CantVectorizeInstruction", St) @@ -2753,13 +2788,13 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, TaskInfo *TI) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(nullptr), DepChecker(std::make_unique(*PSE, L)), TheLoop(L) { PtrRtChecking = std::make_unique(*DepChecker, SE); if (canAnalyzeLoop()) { - analyzeLoop(AA, LI, TLI, DT); + analyzeLoop(AA, LI, TLI, DT, TI); } } @@ -2811,7 +2846,7 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { if (I.second) I.first->second = - std::make_unique(&L, &SE, TLI, &AA, &DT, &LI); + std::make_unique(&L, &SE, TLI, &AA, &DT, &LI, &TI); return *I.first->second; } @@ -2831,7 +2866,8 @@ bool LoopAccessInfoManager::invalidate( return Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || - Inv.invalidate(F, PA); + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA); } LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, @@ -2840,8 +2876,9 @@ LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, auto &AA = FAM.getResult(F); auto &DT = FAM.getResult(F); auto &LI = FAM.getResult(F); + auto &TI = FAM.getResult(F); auto &TLI = FAM.getResult(F); - return LoopAccessInfoManager(SE, AA, DT, LI, &TLI); + return LoopAccessInfoManager(SE, AA, DT, LI, TI, &TLI); } AnalysisKey LoopAccessAnalysis::Key; diff --git a/llvm/lib/Analysis/LoopAnalysisManager.cpp b/llvm/lib/Analysis/LoopAnalysisManager.cpp index 74b1da86eb28d0..211052087cb315 100644 --- a/llvm/lib/Analysis/LoopAnalysisManager.cpp +++ b/llvm/lib/Analysis/LoopAnalysisManager.cpp @@ -11,6 +11,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PassManagerImpl.h" #include @@ -54,6 +55,7 @@ bool LoopAnalysisManagerFunctionProxy::Result::invalidate( Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || invalidateMemorySSAAnalysis) { // Note that the LoopInfo may be stale at this point, however the loop // objects themselves remain the only viable keys that could be in the @@ -141,5 +143,6 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() { PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp index 60a72079e864c1..9d7d0b0fe263b2 100644 --- a/llvm/lib/Analysis/LoopInfo.cpp +++ b/llvm/lib/Analysis/LoopInfo.cpp @@ -57,6 +57,236 @@ static cl::opt // Loop implementation // +// Returns true if the basic block Succ that succeeds BB is the unwind +// destination of a detach. +static bool succIsDetachUnwind(const BasicBlock *BB, const BasicBlock *Succ) { + if (const DetachInst *DI = dyn_cast(BB->getTerminator())) + return Succ == DI->getUnwindDest(); + return false; +} + +/// Returns true if the given instruction performs a taskframe resume, false +/// otherwise. +static bool isDetachedRethrow(const Instruction *I, + const Value *SyncReg = nullptr) { + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID()) + if (!SyncReg || (SyncReg == II->getArgOperand(0))) + return true; + return false; +} + +/// Returns true if the given instruction performs a taskframe resume, false +/// otherwise. +static bool isTaskFrameResume(const Instruction *I, + const Value *TaskFrame = nullptr) { + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::taskframe_resume == Called->getIntrinsicID()) + if (!TaskFrame || (TaskFrame == II->getArgOperand(0))) + return true; + return false; +} + +/// Returns true if the given basic block is a placeholder successor of a +/// taskframe.resume or detached.rethrow. +static bool isTapirPlaceholderSuccessor(const BasicBlock *B) { + for (const BasicBlock *Pred : predecessors(B)) { + if (!isDetachedRethrow(Pred->getTerminator()) && + !isTaskFrameResume(Pred->getTerminator())) + return false; + + const InvokeInst *II = dyn_cast(Pred->getTerminator()); + if (B != II->getNormalDest()) + return false; + } + return true; +} + +/// Helper method to find loop-exit blocks that are contained within tasks +/// spawned within the loop. +static void getTaskExitsHelper(BasicBlock *TaskEntry, const Value *SyncRegion, + const Loop *L, + SmallPtrSetImpl &TaskExits) { + // Traverse the CFG to find the exit blocks from SubT. + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(TaskEntry); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Record any block found in the task that is not contained in the loop + if (!L->contains(BB)) + TaskExits.insert(BB); + + // Stop the CFG traversal at any reattach or detached.rethrow in the same + // sync region. + if (ReattachInst *RI = dyn_cast(BB->getTerminator())) + if (SyncRegion == RI->getSyncRegion()) + continue; + if (isDetachedRethrow(BB->getTerminator(), SyncRegion)) + continue; + + // For all other basic blocks, traverse all successors + for (BasicBlock *Succ : successors(BB)) + Worklist.push_back(Succ); + } +} + +/// getTaskExits - Get basic blocks that are outside of the loop, based on CFG +/// analysis, but inside tasks created within the loop. +/// +void Loop::getTaskExits(SmallPtrSetImpl &TaskExits) const { + SmallVector, 4> TaskEntriesToCheck; + for (auto *BB : blocks()) + if (DetachInst *DI = dyn_cast(BB->getTerminator())) + if (DI->hasUnwindDest()) + if (!contains(DI->getUnwindDest())) + TaskEntriesToCheck.push_back( + std::make_pair(DI->getDetached(), DI->getSyncRegion())); + + for (std::pair &TaskEntry : TaskEntriesToCheck) + getTaskExitsHelper(TaskEntry.first, TaskEntry.second, this, TaskExits); +} + +/// getExitingBlocks - Return all blocks inside the loop that have successors +/// outside of the loop. These are the blocks _inside of the current loop_ +/// which branch out. The returned list is always unique. +/// +void Loop::getExitingBlocks(SmallVectorImpl &ExitingBlocks, + bool IgnoreDetachUnwind) const { + assert(!isInvalid() && "Loop not in a valid state!"); + for (const auto BB : blocks()) + for (const auto *Succ : children(BB)) + if (!contains(Succ)) { + if (IgnoreDetachUnwind && succIsDetachUnwind(BB, Succ)) + continue; + // Not in current loop? It must be an exit block. + ExitingBlocks.push_back(BB); + break; + } +} + +/// getExitingBlock - If getExitingBlocks would return exactly one block, +/// return that block. Otherwise return null. +BasicBlock *Loop::getExitingBlock(bool IgnoreDetachUnwind) const { + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector ExitingBlocks; + getExitingBlocks(ExitingBlocks, IgnoreDetachUnwind); + if (ExitingBlocks.size() == 1) + return ExitingBlocks[0]; + return nullptr; +} + +/// getExitBlocks - Return all of the successor blocks of this loop. These +/// are the blocks _outside of the current loop_ which are branched to. +/// +void Loop::getExitBlocks( + SmallVectorImpl &ExitBlocks) const { + assert(!isInvalid() && "Loop not in a valid state!"); + std::vector Blocks(block_begin(), block_end()); + SmallPtrSet TaskExits; + getTaskExits(TaskExits); + Blocks.insert(Blocks.end(), TaskExits.begin(), TaskExits.end()); + + for (const auto BB : Blocks) + for (auto *Succ : children(BB)) + if (!contains(Succ) && !TaskExits.count(Succ) && + !isTapirPlaceholderSuccessor(Succ)) + // Not in current loop? It must be an exit block. + ExitBlocks.push_back(Succ); +} + +/// getExitBlock - If getExitBlocks would return exactly one block, +/// return that block. Otherwise return null. +BasicBlock *Loop::getExitBlock() const { + assert(!isInvalid() && "Loop not in a valid state!"); + SmallVector ExitBlocks; + getExitBlocks(ExitBlocks); + if (ExitBlocks.size() == 1) + return ExitBlocks[0]; + return nullptr; +} + +bool Loop::hasDedicatedExits() const { + // Each predecessor of each exit block of a normal loop is contained + // within the loop. + SmallVector UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + SmallPtrSet TaskExits; + getTaskExits(TaskExits); + + for (BasicBlock *EB : UniqueExitBlocks) + for (BasicBlock *Predecessor : children>(EB)) + if (!contains(Predecessor) && !TaskExits.count(Predecessor)) + return false; + // All the requirements are met. + return true; +} + +// Helper function to get unique loop exits. Pred is a predicate pointing to +// BasicBlocks in a loop which should be considered to find loop exits. +template +void getUniqueExitBlocksOutsideTasksHelper( + const Loop *L, SmallVectorImpl &ExitBlocks, PredicateT Pred) { + assert(!L->isInvalid() && "Loop not in a valid state!"); + SmallPtrSet Visited; + std::vector Blocks(L->block_begin(), L->block_end()); + SmallPtrSet TaskExits; + L->getTaskExits(TaskExits); + Blocks.insert(Blocks.end(), TaskExits.begin(), TaskExits.end()); + + auto Filtered = make_filter_range(Blocks, Pred); + for (BasicBlock *BB : Filtered) { + for (BasicBlock *Successor : children(BB)) + if (!L->contains(Successor) && !TaskExits.count(Successor) && + !isTapirPlaceholderSuccessor(Successor)) + if (Visited.insert(Successor).second) + ExitBlocks.push_back(Successor); + } +} + +void Loop::getUniqueExitBlocks( + SmallVectorImpl &ExitBlocks) const { + getUniqueExitBlocksOutsideTasksHelper( + this, ExitBlocks, [](const BasicBlock *BB) { return true; }); +} + +void Loop::getUniqueNonLatchExitBlocks( + SmallVectorImpl &ExitBlocks) const { + const BasicBlock *Latch = getLoopLatch(); + assert(Latch && "Latch block must exists"); + getUniqueExitBlocksOutsideTasksHelper( + this, ExitBlocks, [Latch](const BasicBlock *BB) { return BB != Latch; }); +} + +BasicBlock *Loop::getUniqueExitBlock() const { + SmallVector UniqueExitBlocks; + getUniqueExitBlocks(UniqueExitBlocks); + if (UniqueExitBlocks.size() == 1) + return UniqueExitBlocks[0]; + return nullptr; +} + +/// getExitEdges - Return all pairs of (_inside_block_,_outside_block_). +void Loop::getExitEdges(SmallVectorImpl &ExitEdges) const { + assert(!isInvalid() && "Loop not in a valid state!"); + std::vector Blocks(block_begin(), block_end()); + SmallPtrSet TaskExits; + getTaskExits(TaskExits); + Blocks.insert(Blocks.end(), TaskExits.begin(), TaskExits.end()); + + for (const auto BB : Blocks) + for (auto *Succ : children(BB)) + if (!contains(Succ) && !TaskExits.count(Succ) && + !isTapirPlaceholderSuccessor(Succ)) + // Not in current loop? It must be an exit block. + ExitEdges.emplace_back(BB, Succ); +} + bool Loop::isLoopInvariant(const Value *V) const { if (const Instruction *I = dyn_cast(V)) return !contains(I); @@ -429,7 +659,9 @@ bool Loop::isCanonical(ScalarEvolution &SE) const { // Check that 'BB' doesn't have any uses outside of the 'L' static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, - const DominatorTree &DT, bool IgnoreTokens) { + const DominatorTree &DT, + SmallPtrSetImpl &TaskExits, + bool IgnoreTokens) { for (const Instruction &I : BB) { // Tokens can't be used in PHI nodes and live-out tokens prevent loop // optimizations, so for the purposes of considered LCSSA form, we @@ -451,7 +683,7 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, // the use is anywhere in the loop. Most values are used in the same // block they are defined in. Also, blocks not reachable from the // entry are special; uses in them don't need to go through PHIs. - if (UserBB != &BB && !L.contains(UserBB) && + if (UserBB != &BB && !L.contains(UserBB) && !TaskExits.count(UserBB) && DT.isReachableFromEntry(UserBB)) return false; } @@ -461,8 +693,10 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB, bool Loop::isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens) const { // For each block we check that it doesn't have any uses outside of this loop. + SmallPtrSet TaskExits; + getTaskExits(TaskExits); return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*this, *BB, DT, IgnoreTokens); + return isBlockInLCSSAForm(*this, *BB, DT, TaskExits, IgnoreTokens); }); } @@ -471,8 +705,11 @@ bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI, // For each block we check that it doesn't have any uses outside of its // innermost loop. This process will transitively guarantee that the current // loop and all of the nested loops are in LCSSA form. + SmallPtrSet TaskExits; + getTaskExits(TaskExits); return all_of(this->blocks(), [&](const BasicBlock *BB) { - return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, IgnoreTokens); + return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT, TaskExits, + IgnoreTokens); }); } @@ -561,6 +798,31 @@ void Loop::setLoopMustProgress() { setLoopID(NewLoopID); } +void Loop::setDerivedFromTapirLoop() { + LLVMContext &Context = getHeader()->getContext(); + + MDNode *FromTapir = findOptionMDForLoop(this, "llvm.loop.fromtapirloop"); + + if (FromTapir) + return; + + MDNode *FromTapirMD = + MDNode::get(Context, MDString::get(Context, "llvm.loop.fromtapirloop")); + MDNode *LoopID = getLoopID(); + MDNode *NewLoopID = + makePostTransformationMetadata(Context, LoopID, {}, {FromTapirMD}); + setLoopID(NewLoopID); +} + +bool Loop::wasDerivedFromTapirLoop() const { + MDNode *FromTapir = findOptionMDForLoop(this, "llvm.loop.fromtapirloop"); + + if (FromTapir) + return true; + + return false; +} + bool Loop::isAnnotatedParallel() const { MDNode *DesiredLoopIdMetadata = getLoopID(); diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index 53e089ba1feae5..26079dea0f8637 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -371,6 +371,27 @@ static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) { return true; } +std::pair +llvm::getAllocSizeArgs(const CallBase *CB, const TargetLibraryInfo *TLI) { + // Note: This handles both explicitly listed allocation functions and + // allocsize. The code structure could stand to be cleaned up a bit. + const std::optional FnData = getAllocationSize(CB, TLI); + if (!FnData) + return std::make_pair(nullptr, nullptr); + + // Don't handle strdup-like functions. + if (FnData->AllocTy == StrDupLike) + return std::make_pair(nullptr, nullptr); + + if (FnData->SndParam < 0) + // Only have 1 size parameter. + return std::make_pair(CB->getArgOperand(FnData->FstParam), nullptr); + + // Have 2 size parameters. + return std::make_pair(CB->getArgOperand(FnData->FstParam), + CB->getArgOperand(FnData->SndParam)); +} + std::optional llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref Mapper) { diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 071ecdba8a54ac..463fd4aa066068 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -80,6 +81,11 @@ static cl::opt cl::desc("The number of blocks to scan during memory " "dependency analysis (default = 200)")); +static cl::opt + EnableDRF("enable-drf-memdep", cl::init(false), cl::Hidden, + cl::desc("Allow MemoryDependenceAnalysis to assume the program " + "is data-race free.")); + // Limit on the number of memdep results to process. static const unsigned int NumResultsLimit = 100; @@ -185,6 +191,11 @@ MemDepResult MemoryDependenceResults::getCallDependencyFrom( BasicBlock *BB) { unsigned Limit = getDefaultBlockScanLimit(); + if (EnableDRF && TI) + if ((TI->getTaskFor(BB) != TI->getTaskFor(Call->getParent())) + && TI->mayHappenInParallel(Call->getParent(), BB)) + return MemDepResult::getNonLocal(); + // Walk backwards through the block, looking for dependencies. while (ScanIt != BB->begin()) { Instruction *Inst = &*--ScanIt; @@ -241,6 +252,10 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit, BatchAAResults &BatchAA) { + if (EnableDRF && TI && QueryInst) + if ((TI->getTaskFor(BB) != TI->getTaskFor(QueryInst->getParent())) + && TI->mayHappenInParallel(QueryInst->getParent(), BB)) + return MemDepResult::getNonLocal(); MemDepResult InvariantGroupDependency = MemDepResult::getUnknown(); if (QueryInst != nullptr) { if (auto *LI = dyn_cast(QueryInst)) { @@ -1727,7 +1742,8 @@ MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &AC = AM.getResult(F); auto &TLI = AM.getResult(F); auto &DT = AM.getResult(F); - return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit); + auto *TI = EnableDRF ? &AM.getResult(F) : nullptr; + return MemoryDependenceResults(AA, AC, TLI, DT, DefaultBlockScanLimit, TI); } char MemoryDependenceWrapperPass::ID = 0; @@ -1738,6 +1754,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep", "Memory Dependence Analysis", false, true) @@ -1755,6 +1772,8 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); + if (EnableDRF) + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); } @@ -1786,6 +1805,8 @@ bool MemoryDependenceWrapperPass::runOnFunction(Function &F) { auto &AC = getAnalysis().getAssumptionCache(F); auto &TLI = getAnalysis().getTLI(F); auto &DT = getAnalysis().getDomTree(); - MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit); + auto *TI = + EnableDRF ? &getAnalysis().getTaskInfo() : nullptr; + MemDep.emplace(AA, AC, TLI, DT, BlockScanLimit, TI); return false; } diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp index d1665802826683..42b4aba7a03ccf 100644 --- a/llvm/lib/Analysis/MemorySSA.cpp +++ b/llvm/lib/Analysis/MemorySSA.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/BasicBlock.h" @@ -68,6 +69,7 @@ INITIALIZE_PASS_BEGIN(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, true) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false, true) @@ -87,6 +89,15 @@ static cl::opt VerifyMemorySSAX("verify-memoryssa", cl::location(VerifyMemorySSA), cl::Hidden, cl::desc("Enable verification of MemorySSA.")); +static cl::opt + EnableDRF("enable-drf-memoryssa", cl::init(false), cl::Hidden, + cl::desc("Allow MemorySSA to assume the program is " + "data-race free.")); + +static cl::opt RequireTI("require-taskinfo-memoryssa", cl::init(true), + cl::Hidden, + cl::desc("Require TaskInfo for MemorySSA.")); + const static char LiveOnEntryStr[] = "liveOnEntry"; namespace { @@ -172,7 +183,7 @@ class MemoryLocOrCall { IsCall = false; // There is no such thing as a memorylocation for a fence inst, and it is // unique in that regard. - if (!isa(Inst)) + if (!isa(Inst) && !isa(Inst)) Loc = MemoryLocation::get(Inst); } } @@ -279,10 +290,25 @@ static bool areLoadsReorderable(const LoadInst *Use, template static bool instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, - const Instruction *UseInst, AliasAnalysisType &AA) { + const Instruction *UseInst, AliasAnalysisType &AA, + TaskInfo *TI) { Instruction *DefInst = MD->getMemoryInst(); assert(DefInst && "Defining instruction not actually an instruction"); + if (TI && EnableDRF) + if ((TI->getTaskFor(MD->getBlock()) != + TI->getTaskFor(UseInst->getParent())) && + TI->mayHappenInParallel(MD->getBlock(), UseInst->getParent())) + return false; + + // Check for invokes of detached.rethrow, taskframe.resume, or sync.unwind. + if (const InvokeInst *II = dyn_cast(DefInst)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID() || + Intrinsic::sync_unwind == Called->getIntrinsicID()) + return false; + if (const IntrinsicInst *II = dyn_cast(DefInst)) { // These intrinsics will show up as affecting memory, but they are just // markers, mostly. @@ -294,6 +320,12 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, switch (II->getIntrinsicID()) { case Intrinsic::invariant_start: case Intrinsic::invariant_end: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::pseudoprobe: @@ -308,7 +340,12 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, } if (auto *CB = dyn_cast_or_null(UseInst)) { - ModRefInfo I = AA.getModRefInfo(DefInst, CB); + bool SameSpindle = false; + if (TI && CB->isStrandPure() && + (TI->getSpindleFor(CB->getParent()) == + TI->getSpindleFor(DefInst->getParent()))) + SameSpindle = true; + ModRefInfo I = AA.getModRefInfo(DefInst, CB, SameSpindle); return isModOrRefSet(I); } @@ -323,20 +360,20 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc, template static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU, const MemoryLocOrCall &UseMLOC, - AliasAnalysisType &AA) { + AliasAnalysisType &AA, TaskInfo *TI) { // FIXME: This is a temporary hack to allow a single instructionClobbersQuery // to exist while MemoryLocOrCall is pushed through places. if (UseMLOC.IsCall) return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(), - AA); + AA, TI); return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(), - AA); + AA, TI); } // Return true when MD may alias MU, return false otherwise. bool MemorySSAUtil::defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU, - AliasAnalysis &AA) { - return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA); + AliasAnalysis &AA, TaskInfo *TI) { + return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA, TI); } namespace { @@ -394,7 +431,7 @@ LLVM_ATTRIBUTE_UNUSED static void checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, const MemoryLocation &StartLoc, const MemorySSA &MSSA, const UpwardsMemoryQuery &Query, BatchAAResults &AA, - bool AllowImpreciseClobber = false) { + TaskInfo *TI = nullptr, bool AllowImpreciseClobber = false) { assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?"); if (MSSA.isLiveOnEntryDef(Start)) { @@ -426,7 +463,7 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, // since MD may only act as a clobber for 1 of N MemoryLocations. FoundClobber = FoundClobber || MSSA.isLiveOnEntryDef(MD); if (!FoundClobber) { - if (instructionClobbersQuery(MD, MAP.second, Query.Inst, AA)) + if (instructionClobbersQuery(MD, MAP.second, Query.Inst, AA, TI)) FoundClobber = true; } } @@ -441,7 +478,7 @@ checkClobberSanity(const MemoryAccess *Start, MemoryAccess *ClobberAt, if (MD == Start) continue; - assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) && + assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA, TI) && "Found clobber before reaching ClobberAt!"); continue; } @@ -510,6 +547,7 @@ class ClobberWalker { const MemorySSA &MSSA; DominatorTree &DT; BatchAAResults *AA; + TaskInfo *TI; UpwardsMemoryQuery *Query; unsigned *UpwardWalkLimit; @@ -575,7 +613,7 @@ class ClobberWalker { if (!--*UpwardWalkLimit) return {Current, true}; - if (instructionClobbersQuery(MD, Desc.Loc, Query->Inst, *AA)) + if (instructionClobbersQuery(MD, Desc.Loc, Query->Inst, *AA, TI)) return {MD, true}; } } @@ -919,8 +957,8 @@ class ClobberWalker { } public: - ClobberWalker(const MemorySSA &MSSA, DominatorTree &DT) - : MSSA(MSSA), DT(DT) {} + ClobberWalker(const MemorySSA &MSSA, DominatorTree &DT, TaskInfo *TI) + : MSSA(MSSA), DT(DT), TI(TI) {} /// Finds the nearest clobber for the given query, optimizing phis if /// possible. @@ -956,7 +994,7 @@ class ClobberWalker { #ifdef EXPENSIVE_CHECKS if (!Q.SkipSelfAccess && *UpwardWalkLimit > 0) - checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, BAA); + checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, BAA, TI); #endif return Result; } @@ -987,7 +1025,8 @@ class MemorySSA::ClobberWalkerBase { MemorySSA *MSSA; public: - ClobberWalkerBase(MemorySSA *M, DominatorTree *D) : Walker(*M, *D), MSSA(M) {} + ClobberWalkerBase(MemorySSA *M, DominatorTree *D, TaskInfo *TI) + : Walker(*M, *D, TI), MSSA(M) {} MemoryAccess *getClobberingMemoryAccessBase(MemoryAccess *, const MemoryLocation &, @@ -1227,8 +1266,9 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { } } -MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT) - : DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), +MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT, + TaskInfo *TI) + : DT(DT), TI(TI), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr), SkipWalker(nullptr) { // Build MemorySSA using a batch alias analysis. This reuses the internal // state that AA collects during an alias()/getModRefInfo() call. This is @@ -1280,8 +1320,8 @@ namespace llvm { class MemorySSA::OptimizeUses { public: OptimizeUses(MemorySSA *MSSA, CachingWalker *Walker, BatchAAResults *BAA, - DominatorTree *DT) - : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT) {} + DominatorTree *DT, TaskInfo *TI) + : MSSA(MSSA), Walker(Walker), AA(BAA), DT(DT), TI(TI) {} void optimizeUses(); @@ -1311,6 +1351,7 @@ class MemorySSA::OptimizeUses { CachingWalker *Walker; BatchAAResults *AA; DominatorTree *DT; + TaskInfo *TI; }; } // end namespace llvm @@ -1442,7 +1483,7 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock( } MemoryDef *MD = cast(VersionStack[UpperBound]); - if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) { + if (instructionClobbersQuery(MD, MU, UseMLOC, *AA, TI)) { FoundClobberResult = true; break; } @@ -1551,7 +1592,7 @@ MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() { return Walker.get(); if (!WalkerBase) - WalkerBase = std::make_unique(this, DT); + WalkerBase = std::make_unique(this, DT, TI); Walker = std::make_unique(this, WalkerBase.get()); return Walker.get(); @@ -1562,7 +1603,7 @@ MemorySSAWalker *MemorySSA::getSkipSelfWalker() { return SkipWalker.get(); if (!WalkerBase) - WalkerBase = std::make_unique(this, DT); + WalkerBase = std::make_unique(this, DT, TI); SkipWalker = std::make_unique(this, WalkerBase.get()); return SkipWalker.get(); @@ -1728,6 +1769,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, case Intrinsic::assume: case Intrinsic::experimental_noalias_scope_decl: case Intrinsic::pseudoprobe: + case Intrinsic::syncregion_start: return nullptr; } } @@ -1738,6 +1780,10 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I, if (!I->mayReadFromMemory() && !I->mayWriteToMemory()) return nullptr; + // Ignore detach instructions. + if (isa(I)) + return nullptr; + bool Def, Use; if (Template) { Def = isa(Template); @@ -2139,9 +2185,9 @@ void MemorySSA::ensureOptimizedUses() { return; BatchAAResults BatchAA(*AA); - ClobberWalkerBase WalkerBase(this, DT); + ClobberWalkerBase WalkerBase(this, DT, TI); CachingWalker WalkerLocal(this, &WalkerBase); - OptimizeUses(this, &WalkerLocal, &BatchAA, DT).optimizeUses(); + OptimizeUses(this, &WalkerLocal, &BatchAA, DT, TI).optimizeUses(); IsOptimized = true; } @@ -2304,7 +2350,9 @@ MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AA = AM.getResult(F); - return MemorySSAAnalysis::Result(std::make_unique(F, &AA, &DT)); + TaskInfo *TI = &AM.getResult(F); + return MemorySSAAnalysis::Result( + std::make_unique(F, &AA, &DT, TI)); } bool MemorySSAAnalysis::Result::invalidate( @@ -2313,7 +2361,8 @@ bool MemorySSAAnalysis::Result::invalidate( auto PAC = PA.getChecker(); return !(PAC.preserved() || PAC.preservedSet>()) || Inv.invalidate(F, PA) || - Inv.invalidate(F, PA); + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA); } PreservedAnalyses MemorySSAPrinterPass::run(Function &F, @@ -2361,12 +2410,17 @@ void MemorySSAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); + // TODO: Add TaskInfoWrapperPass to lib/Analysis/LoopPass.cpp to make this + // work? + if (RequireTI || EnableDRF) + AU.addRequiredTransitive(); } bool MemorySSAWrapperPass::runOnFunction(Function &F) { auto &DT = getAnalysis().getDomTree(); auto &AA = getAnalysis().getAAResults(); - MSSA.reset(new MemorySSA(F, &AA, &DT)); + auto &TI = getAnalysis().getTaskInfo(); + MSSA.reset(new MemorySSA(F, &AA, &DT, &TI)); return false; } diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp index d4b31f2b001878..7de271413a0e99 100644 --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -14,6 +14,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/Dominators.h" @@ -253,10 +254,33 @@ bool LoopSafetyInfo::allLoopPathsLeadToBlock(const Loop *CurLoop, return true; } +// Helper function to check if an instruction is guaranteed to execute in the +// task T containing it. +static bool isGuaranteedToExecuteInTask(const Instruction &Inst, + const DominatorTree *DT, + const Task *T) { + assert(T && T->encloses(Inst.getParent()) && "Inst is not in given task."); + // Examine all exiting blocks of the task. + for (const Spindle *S : + depth_first>(T->getEntrySpindle())) { + for (const BasicBlock *Exit : S->spindle_exits()) { + if (!T->isTaskExiting(Exit)) + continue; + + // If Inst does not dominate the exiting block, then it's not guaranteed + // to execute. + if (!DT->dominates(Inst.getParent(), Exit)) + return false; + } + } + return true; +} + /// Returns true if the instruction in a loop is guaranteed to execute at least /// once. bool SimpleLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, + const TaskInfo *TI, const Loop *CurLoop) const { // If the instruction is in the header block for the loop (which is very // common), it is always guaranteed to dominate the exit blocks. Since this @@ -269,16 +293,75 @@ bool SimpleLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst, return !HeaderMayThrow || Inst.getParent()->getFirstNonPHIOrDbg() == &Inst; + // If the instruction is inside of a subtask, verify that it dominates the + // exits of the subtask, and use the corresponding detach to determine whether + // the instruction is guaranteed to execute. + bool InstGuaranteedToExecuteInSubtask = true; + const Instruction *RepInst = &Inst; + if (TI) { + const Task *LoopTask = TI->getTaskFor(CurLoop->getHeader()); + while (InstGuaranteedToExecuteInSubtask) { + const Task *T = TI->getTaskFor(RepInst->getParent()); + // If the representative instruction and loop are in the same task, we're + // done traversing subtasks. + if (T == LoopTask) + break; + + // Check if the instruction is guaranteed to execute in its task. + if (!isGuaranteedToExecuteInTask(*RepInst, DT, T)) + InstGuaranteedToExecuteInSubtask = false; + else + // Use the task's detach in place of the original instruction. + RepInst = T->getDetach(); + } + } + + // If a subtask was found in which the instruction is not guaranteed to + // execute, then the instruction is not guaranteed to execute. + if (!InstGuaranteedToExecuteInSubtask) + return false; + // If there is a path from header to exit or latch that doesn't lead to our // instruction's block, return false. - return allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT); + return allLoopPathsLeadToBlock(CurLoop, RepInst->getParent(), DT); } bool ICFLoopSafetyInfo::isGuaranteedToExecute(const Instruction &Inst, const DominatorTree *DT, + const TaskInfo *TI, const Loop *CurLoop) const { - return !ICF.isDominatedByICFIFromSameBlock(&Inst) && - allLoopPathsLeadToBlock(CurLoop, Inst.getParent(), DT); + if (ICF.isDominatedByICFIFromSameBlock(&Inst)) + return false; + + // If the instruction is inside of a subtask, verify that it dominates the + // exits of the subtask, and use the corresponding detach to determine whether + // the instruction is guaranteed to execute. + bool InstGuaranteedToExecuteInSubtask = true; + const Instruction *RepInst = &Inst; + if (TI) { + const Task *LoopTask = TI->getTaskFor(CurLoop->getHeader()); + while (InstGuaranteedToExecuteInSubtask) { + const Task *T = TI->getTaskFor(RepInst->getParent()); + // If the representative instruction and loop are in the same task, we're + // done traversing subtasks. + if (T == LoopTask) + break; + + // Check if the instruction is guaranteed to execute in its task. + if (!isGuaranteedToExecuteInTask(*RepInst, DT, T)) + InstGuaranteedToExecuteInSubtask = false; + else + // Use the task's detach in place of the original instruction. + RepInst = T->getDetach(); + } + } + + // If a subtask was found in which the instruction is not guaranteed to + // execute, then the instruction is not guaranteed to execute. + if (!InstGuaranteedToExecuteInSubtask) + return false; + + return allLoopPathsLeadToBlock(CurLoop, RepInst->getParent(), DT); } bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const BasicBlock *BB, @@ -309,13 +392,14 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I, doesNotWriteMemoryBefore(BB, CurLoop); } -static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) { +static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT, + TaskInfo *TI) { // TODO: merge these two routines. For the moment, we display the best // result obtained by *either* implementation. This is a bit unfair since no // caller actually gets the full power at the moment. SimpleLoopSafetyInfo LSI; LSI.computeLoopSafetyInfo(L); - return LSI.isGuaranteedToExecute(I, DT, L) || + return LSI.isGuaranteedToExecute(I, DT, TI, L) || isGuaranteedToExecuteForEveryIteration(&I, L); } @@ -327,11 +411,11 @@ class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter { public: MustExecuteAnnotatedWriter(const Function &F, - DominatorTree &DT, LoopInfo &LI) { + DominatorTree &DT, LoopInfo &LI, TaskInfo &TI) { for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { - if (isMustExecuteIn(I, L, &DT)) { + if (isMustExecuteIn(I, L, &DT, &TI)) { MustExec[&I].push_back(L); } L = L->getParentLoop(); @@ -339,12 +423,12 @@ class MustExecuteAnnotatedWriter : public AssemblyAnnotationWriter { } } MustExecuteAnnotatedWriter(const Module &M, - DominatorTree &DT, LoopInfo &LI) { + DominatorTree &DT, LoopInfo &LI, TaskInfo &TI) { for (const auto &F : M) for (const auto &I: instructions(F)) { Loop *L = LI.getLoopFor(I.getParent()); while (L) { - if (isMustExecuteIn(I, L, &DT)) { + if (isMustExecuteIn(I, L, &DT, &TI)) { MustExec[&I].push_back(L); } L = L->getParentLoop(); @@ -742,8 +826,9 @@ PreservedAnalyses MustExecutePrinterPass::run(Function &F, FunctionAnalysisManager &AM) { auto &LI = AM.getResult(F); auto &DT = AM.getResult(F); + auto &TI = AM.getResult(F); - MustExecuteAnnotatedWriter Writer(F, DT, LI); + MustExecuteAnnotatedWriter Writer(F, DT, LI, TI); F.print(OS, &Writer); return PreservedAnalyses::all(); } diff --git a/llvm/lib/Analysis/TapirRaceDetect.cpp b/llvm/lib/Analysis/TapirRaceDetect.cpp new file mode 100644 index 00000000000000..a3f6e63ee1f378 --- /dev/null +++ b/llvm/lib/Analysis/TapirRaceDetect.cpp @@ -0,0 +1,2206 @@ +//===- TapirRaceDetect.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// TapirRaceDetect is an LLVM pass that analyses Tapir tasks and dependences +// between memory accesses to find accesses that might race. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TapirRaceDetect.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasSetTracker.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SpecialCaseList.h" +#include "llvm/Support/VirtualFileSystem.h" + +using namespace llvm; + +#define DEBUG_TYPE "tapir-race-detect" + +static cl::opt + AssumeSafeMalloc( + "assume-safe-malloc", cl::init(true), cl::Hidden, + cl::desc("Assume that calls to allocation functions are safe.")); + +static cl::opt + IgnoreTerminationCalls( + "ignore-termination-calls", cl::init(true), cl::Hidden, + cl::desc("Ignore calls in program-terminating exit blocks.")); + +static cl::opt + MaxUsesToExploreCapture( + "max-uses-to-explore-capture", cl::init(unsigned(-1)), cl::Hidden, + cl::desc("Maximum number of uses to explore for a capture query.")); + +static cl::list ClABIListFiles( + "strat-ignorelist", + cl::desc("File listing native ABI functions and how the pass treats them"), + cl::Hidden); + +// Boilerplate for legacy and new pass managers + +TapirRaceDetect::Result +TapirRaceDetect::run(Function &F, FunctionAnalysisManager &FAM) { + auto &DT = FAM.getResult(F); + auto &LI = FAM.getResult(F); + auto &TI = FAM.getResult(F); + auto &DI = FAM.getResult(F); + auto &SE = FAM.getResult(F); + auto *TLI = &FAM.getResult(F); + return RaceInfo(&F, DT, LI, TI, DI, SE, TLI); +} + +AnalysisKey TapirRaceDetect::Key; + +INITIALIZE_PASS_BEGIN(TapirRaceDetectWrapperPass, "tapir-race-detect", + "Tapir Race Detection", true, true) +INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(TapirRaceDetectWrapperPass, "tapir-race-detect", + "Tapir Race Detection", true, true) + +char TapirRaceDetectWrapperPass::ID = 0; + +TapirRaceDetectWrapperPass::TapirRaceDetectWrapperPass() : FunctionPass(ID) { + initializeTapirRaceDetectWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool TapirRaceDetectWrapperPass::runOnFunction(Function &F) { + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &TI = getAnalysis().getTaskInfo(); + auto &DI = getAnalysis().getDI(); + auto &SE = getAnalysis().getSE(); + auto *TLI = &getAnalysis().getTLI(F); + Info.reset(new RaceInfo(&F, DT, LI, TI, DI, SE, TLI)); + return false; +} + +RaceInfo &TapirRaceDetectWrapperPass::getRaceInfo() const { return *Info; } + +void TapirRaceDetectWrapperPass::releaseMemory() { Info.reset(); } + +void TapirRaceDetectWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequiredTransitive(); + AU.addRequired(); + AU.addRequiredTransitive(); +} + +FunctionPass *llvm::createTapirRaceDetectWrapperPass() { + return new TapirRaceDetectWrapperPass(); +} + +void TapirRaceDetectWrapperPass::print(raw_ostream &OS, + const Module *) const { + Info->print(OS); +} + +PreservedAnalyses +TapirRaceDetectPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) { + OS << "'Tapir race detection' for function '" << F.getName() << "':\n"; + FAM.getResult(F).print(OS); + return PreservedAnalyses::all(); +} + +bool RaceInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &Inv) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker(); + return !(PAC.preserved() || PAC.preservedSet>() || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA) || + Inv.invalidate(F, PA)); +} + +// Copied from DataFlowSanitizer.cpp +static StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getValueType(); + // For now we support ignoring struct types only. + if (StructType *SGType = dyn_cast(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return ""; +} + +namespace { + +// Copied and adapted from DataFlowSanitizer.cpp +class StratABIList { + std::unique_ptr SCL; + + public: + StratABIList() = default; + + void set(std::unique_ptr List) { SCL = std::move(List); } + + /// Returns whether either this function or its source file are listed in the + /// given category. + bool isIn(const Function &F, StringRef Category = StringRef()) const { + return isIn(*F.getParent(), Category) || + SCL->inSection("cilk", "fun", F.getName(), Category); + } + + /// Returns whether this type is listed in the given category. + bool isIn(const Type &Ty, StringRef Category = StringRef()) const { + const Type *ElTy = &Ty; + // We only handle struct types right now. + if (const StructType *STy = dyn_cast(ElTy)) + if (STy->hasName()) + return SCL->inSection("cilk", "type", STy->getName(), Category); + return false; + } + + bool isIn(const GlobalVariable &GV, StringRef Category = StringRef()) const { + return isIn(*GV.getParent(), Category) || + SCL->inSection("cilk", "global", GV.getName(), Category); + } + + /// Returns whether this global alias is listed in the given category. + /// + /// If GA aliases a function, the alias's name is matched as a function name + /// would be. Similarly, aliases of globals are matched like globals. + bool isIn(const GlobalAlias &GA, StringRef Category = StringRef()) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa(GA.getValueType())) + return SCL->inSection("cilk", "fun", GA.getName(), Category); + + return SCL->inSection("cilk", "global", GA.getName(), Category) || + SCL->inSection("cilk", "type", GetGlobalTypeString(GA), + Category); + } + + /// Returns whether this module is listed in the given category. + bool isIn(const Module &M, StringRef Category = StringRef()) const { + return SCL->inSection("cilk", "src", M.getModuleIdentifier(), Category); + } +}; + +// Structure to record the set of child tasks that might be in parallel with +// this spindle, ignoring back edges of loops. +// +// TODO: Improve this analysis to track the loop back edges responsible for +// specific maybe-parallel tasks. Use these back-edge tags to refine the +// dependence-analysis component of static race detection. Possible test case: +// intel/BlackScholes. +struct MaybeParallelTasksInLoopBody : public MaybeParallelTasks { + MPTaskListTy TaskList; + LoopInfo &LI; + + MaybeParallelTasksInLoopBody(LoopInfo &LI) : LI(LI) {} + + // This method performs the data-flow update computation on a given spindle. + bool evaluate(const Spindle *S, unsigned EvalNum) { + LLVM_DEBUG(dbgs() << "MPTInLoop::evaluate @ " << S->getEntry()->getName() + << "\n"); + if (!TaskList.count(S)) + TaskList.try_emplace(S); + + bool Complete = true; + for (const Spindle::SpindleEdge &PredEdge : S->in_edges()) { + const Spindle *Pred = PredEdge.first; + const BasicBlock *Inc = PredEdge.second; + + // If the incoming edge is a sync edge, get the associated sync region. + const Value *SyncRegSynced = nullptr; + if (const SyncInst *SI = dyn_cast(Inc->getTerminator())) + SyncRegSynced = SI->getSyncRegion(); + + // Skip back edges for this task list. + if (Loop *L = LI.getLoopFor(S->getEntry())) + if ((L->getHeader() == S->getEntry()) && L->contains(Inc)) + continue; + + // Iterate through the tasks in the task list for Pred. + for (const Task *MP : TaskList[Pred]) { + // Filter out any tasks that are synced by the sync region. + if (const DetachInst *DI = MP->getDetach()) + if (SyncRegSynced == DI->getSyncRegion()) + continue; + // Insert the task into this spindle's task list. If this task is a new + // addition, then we haven't yet reached the fixed point of this + // analysis. + if (TaskList[S].insert(MP).second) + Complete = false; + } + } + LLVM_DEBUG({ + dbgs() << " New MPT list for " << S->getEntry()->getName() + << (Complete ? " (complete)\n" : " (not complete)\n"); + for (const Task *MP : TaskList[S]) + dbgs() << " " << MP->getEntry()->getName() << "\n"; + }); + return Complete; + } +}; + +class AccessPtrAnalysis { +public: + /// Read or write access location. + // using MemAccessInfo = PointerIntPair; + using MemAccessInfo = RaceInfo::MemAccessInfo; + // using MemAccessInfoList = SmallVector; + // using AccessToUnderlyingObjMap = + // DenseMap>; + using AccessToUnderlyingObjMap = RaceInfo::AccessToUnderlyingObjMap; + + AccessPtrAnalysis(DominatorTree &DT, TaskInfo &TI, LoopInfo &LI, + DependenceInfo &DI, ScalarEvolution &SE, + const TargetLibraryInfo *TLI, + AccessToUnderlyingObjMap &AccessToObjs) + : DT(DT), TI(TI), LI(LI), DI(DI), AA(DI.getAA()), SE(SE), TLI(TLI), + AccessToObjs(AccessToObjs), MPTasksInLoop(LI) { + TI.evaluateParallelState(MPTasks); + + std::vector AllABIListFiles; + AllABIListFiles.insert(AllABIListFiles.end(), ClABIListFiles.begin(), + ClABIListFiles.end()); + ABIList.set(SpecialCaseList::createOrDie(AllABIListFiles, + *vfs::getRealFileSystem())); + } + + void addFunctionArgument(Value *Arg); + void addAccess(Instruction *I); + + void processAccessPtrs(RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace, + RaceInfo::PtrChecksTy &AllPtrRtChecks); + +private: + using PtrAccessSet = SetVector; + + void checkForRacesHelper(const Task *T, RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace); + bool checkOpaqueAccesses(GeneralAccess &GA1, GeneralAccess &GA2); + void evaluateMaybeParallelAccesses(GeneralAccess &GA1, GeneralAccess &GA2, + RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace); + bool checkDependence(std::unique_ptr D, GeneralAccess &GA1, + GeneralAccess &GA2); + // void getRTPtrChecks(Loop *L, RaceInfo::ResultTy &Result, + // RaceInfo::PtrChecksTy &AllPtrRtChecks); + + bool PointerCapturedBefore(const Value *Ptr, const Instruction *I, + unsigned MaxUsesToExplore) const; + + AliasResult underlyingObjectsAlias(const GeneralAccess &GAA, + const GeneralAccess &GAB); + + void recordLocalRace(const GeneralAccess &GA, RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace, + const GeneralAccess &Competitor); + DominatorTree &DT; + TaskInfo &TI; + LoopInfo &LI; + DependenceInfo &DI; + AliasAnalysis *AA; + ScalarEvolution &SE; + + const TargetLibraryInfo *TLI; + SmallPtrSet ArgumentPtrs; + AccessToUnderlyingObjMap &AccessToObjs; + + MaybeParallelTasks MPTasks; + MaybeParallelTasksInLoopBody MPTasksInLoop; + + // A mapping of tasks to instructions in that task that might participate in a + // determinacy race. + using TaskAccessMapTy = DenseMap>; + TaskAccessMapTy TaskAccessMap; + + // A mapping of spindles to instructions in that spindle that might + // participate in a determinacy race. + using SpindleAccessMapTy = + DenseMap>; + SpindleAccessMapTy SpindleAccessMap; + + // A mapping of loops to instructions in that loop that might + // participate in a determinacy race. + using LoopAccessMapTy = DenseMap>; + LoopAccessMapTy LoopAccessMap; + + mutable DenseMap, bool> + MayBeCapturedCache; + + // /// We need to check that all of the pointers in this list are disjoint + // /// at runtime. Using std::unique_ptr to make using move ctor simpler. + // DenseMap AllPtrRtChecking; + + // ABI list to ignore. + StratABIList ABIList; +}; + +} // end anonymous namespace + +static bool isFreeFn(const Instruction *I, const TargetLibraryInfo *TLI) { + if (!isa(I)) + return false; + const CallBase *CB = dyn_cast(I); + + if (!TLI) + return false; + + if (getFreedOperand(CB, TLI)) + return true; + + // Ideally we would just use getFreedOperand to determine whether I is a call + // to a libfree funtion. But if -fno-builtin is used, then getFreedOperand + // won't recognize any libfree functions. For instrumentation purposes, + // it's sufficient to recognize the function name. + const StringRef FreeFnNames[] = { + "_ZdlPv", + "_ZdaPv", + "_ZdlPvj", + "_ZdlPvm", + "_ZdlPvRKSt9nothrow_t", + "_ZdlPvSt11align_val_t", + "_ZdaPvj", + "_ZdaPvm", + "_ZdaPvRKSt9nothrow_t", + "_ZdaPvSt11align_val_t", + "_ZdlPvSt11align_val_tRKSt9nothrow_t", + "_ZdaPvSt11align_val_tRKSt9nothrow_t", + "_ZdlPvjSt11align_val_t", + "_ZdlPvmSt11align_val_t", + "_ZdaPvjSt11align_val_t", + "_ZdaPvmSt11align_val_t", + "??3@YAXPAX@Z", + "??3@YAXPAXABUnothrow_t@std@@@Z", + "??3@YAXPAXI@Z", + "??3@YAXPEAX@Z", + "??3@YAXPEAXAEBUnothrow_t@std@@@Z", + "??3@YAXPEAX_K@Z", + "??_V@YAXPAX@Z", + "??_V@YAXPAXABUnothrow_t@std@@@Z", + "??_V@YAXPAXI@Z", + "??_V@YAXPEAX@Z", + "??_V@YAXPEAXAEBUnothrow_t@std@@@Z", + "??_V@YAXPEAX_K@Z", + "__kmpc_free_shared" + }; + + if (const Function *Called = CB->getCalledFunction()) { + StringRef FnName = Called->getName(); + if (!llvm::any_of(FreeFnNames, [&](const StringRef FreeFnName) { + return FnName == FreeFnName; + })) + return false; + + // Confirm that this function is a recognized library function + LibFunc F; + bool FoundLibFunc = TLI->getLibFunc(*Called, F); + return FoundLibFunc; + } + + return false; +} + +static bool isAllocFn(const Instruction *I, const TargetLibraryInfo *TLI) { + if (!isa(I)) + return false; + + if (!TLI) + return false; + + if (isAllocationFn(I, TLI)) + return true; + + // Ideally we would just use isAllocationFn to determine whether I is a call + // to an allocation funtion. But if -fno-builtin is used, then isAllocationFn + // won't recognize any allocation functions. For instrumentation purposes, + // it's sufficient to recognize the function name. + const StringRef AllocFnNames[] = { + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_ZnwjSt11align_val_t", + "_ZnwjSt11align_val_tRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", + "_ZnwmSt11align_val_t", + "_ZnwmSt11align_val_tRKSt9nothrow_t", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_ZnajSt11align_val_t", + "_ZnajSt11align_val_tRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_ZnamSt11align_val_t", + "_ZnamSt11align_val_tRKSt9nothrow_t", + "??2@YAPAXI@Z", + "??2@YAPAXIABUnothrow_t@std@@@Z", + "??2@YAPEAX_K@Z", + "??2@YAPEAX_KAEBUnothrow_t@std@@@Z", + "??_U@YAPAXI@Z", + "??_U@YAPAXIABUnothrow_t@std@@@Z", + "??_U@YAPEAX_K@Z", + "??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", + "strdup", + "dunder_strdup", + "strndup", + "dunder_strndup", + "__kmpc_alloc_shared", + "posix_memalign" + }; + + if (const Function *Called = dyn_cast(I)->getCalledFunction()) { + StringRef FnName = Called->getName(); + if (!llvm::any_of(AllocFnNames, [&](const StringRef AllocFnName) { + return FnName == AllocFnName; + })) + return false; + + // Confirm that this function is a recognized library function + LibFunc F; + bool FoundLibFunc = TLI->getLibFunc(*Called, F); + return FoundLibFunc; + } + + return false; +} + +static bool isAllocFn(const Value *V, const TargetLibraryInfo *TLI) { + if (const CallBase *CB = dyn_cast(V)) + return isAllocFn(CB, TLI); + return false; +} + +static bool isReallocFn(const CallBase *Call) { + return (static_cast( + Call->getFnAttr(Attribute::AllocKind).getValueAsInt()) & + AllocFnKind::Realloc) != AllocFnKind::Unknown; +} + +static bool checkInstructionForRace(const Instruction *I, + const TargetLibraryInfo *TLI) { + if (isa(I) || isa(I) || isa(I) || + isa(I) || isa(I) || + isa(I) || isa(I)) + return true; + + if (const CallBase *Call = dyn_cast(I)) { + // Ignore debug info intrinsics + if (isa(I)) + return false; + + if (const Function *Called = Call->getCalledFunction()) { + // Check for detached.rethrow, taskframe.resume, or sync.unwind, which + // might be invoked. + if (Intrinsic::detached_rethrow == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID() || + Intrinsic::sync_unwind == Called->getIntrinsicID()) + return false; + + // Ignore CSI and Cilksan functions + if (Called->hasName() && (Called->getName().startswith("__csi") || + Called->getName().startswith("__csan") || + Called->getName().startswith("__cilksan"))) + return false; + } + + // Ignore other intrinsics. + if (const IntrinsicInst *II = dyn_cast(I)) { + // Ignore intrinsics that do not access memory. + if (II->doesNotAccessMemory()) + return false; + // TODO: Exclude all intrinsics for which + // TTI::getIntrinsicCost() == TCC_Free? + switch (II->getIntrinsicID()) { + default: return true; + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: + return false; + } + } + + // We can assume allocation functions are safe. + if (AssumeSafeMalloc && isAllocFn(Call, TLI)) { + return isReallocFn(Call); + } + + // If this call occurs in a termination block of the program, ignore it. + if (IgnoreTerminationCalls && + isa(I->getParent()->getTerminator())) { + const Function *CF = Call->getCalledFunction(); + // If this function call is indirect, we want to instrument it. + if (!CF) + return true; + // If this is an ordinary function call in a terminating block, ignore it. + if (!CF->hasFnAttribute(Attribute::NoReturn)) + return false; + // If this is a call to a terminating function, such as "exit" or "abort", + // ignore it. + if (CF->hasName() && + ((CF->getName() == "exit") || (CF->getName() == "abort") || + (CF->getName() == "__clang_call_terminate") || + (CF->getName() == "__assert_fail"))) + return false; + } + + // We want to instrument calls in general. + return true; + } + return false; +} + +// Get the general memory accesses for the instruction \p I, and stores those +// accesses into \p AccI. Returns true if general memory accesses could be +// derived for I, false otherwise. +static void GetGeneralAccesses( + Instruction *I, SmallVectorImpl &AccI, AliasAnalysis *AA, + const TargetLibraryInfo *TLI) { + // Handle common memory instructions + if (LoadInst *LI = dyn_cast(I)) { + MemoryLocation Loc = MemoryLocation::get(LI); + if (!AA->pointsToConstantMemory(Loc)) + AccI.push_back(GeneralAccess(LI, Loc, ModRefInfo::Ref)); + return; + } + if (StoreInst *SI = dyn_cast(I)) { + AccI.push_back(GeneralAccess(SI, MemoryLocation::get(SI), ModRefInfo::Mod)); + return; + } + // Handle atomic instructions + if (AtomicCmpXchgInst *CXI = dyn_cast(I)) { + AccI.push_back(GeneralAccess(CXI, MemoryLocation::get(CXI), + ModRefInfo::Mod)); + return; + } + if (AtomicRMWInst *RMWI = dyn_cast(I)) { + AccI.push_back(GeneralAccess(RMWI, MemoryLocation::get(RMWI), + ModRefInfo::Mod)); + return; + } + + // Handle VAArgs. + if (VAArgInst *VAAI = dyn_cast(I)) { + MemoryLocation Loc = MemoryLocation::get(VAAI); + if (!AA->pointsToConstantMemory(Loc)) + AccI.push_back(GeneralAccess(VAAI, Loc, ModRefInfo::ModRef)); + return; + } + + // Handle memory intrinsics. + if (AnyMemSetInst *MSI = dyn_cast(I)) { + AccI.push_back(GeneralAccess(MSI, MemoryLocation::getForDest(MSI), + ModRefInfo::Mod)); + return; + } + if (AnyMemTransferInst *MTI = dyn_cast(I)) { + AccI.push_back(GeneralAccess(MTI, MemoryLocation::getForDest(MTI), + 0, ModRefInfo::Mod)); + MemoryLocation Loc = MemoryLocation::getForSource(MTI); + if (!AA->pointsToConstantMemory(Loc)) + AccI.push_back(GeneralAccess(MTI, Loc, 1, ModRefInfo::Ref)); + return; + } + + // Handle arbitrary call sites by examining pointee arguments. + // + // This logic is based on that in AliasSetTracker.cpp. + if (const CallBase *Call = dyn_cast(I)) { + ModRefInfo CallMask = AA->getMemoryEffects(Call).getModRef(); + + // Some intrinsics are marked as modifying memory for control flow modelling + // purposes, but don't actually modify any specific memory location. + using namespace PatternMatch; + if (Call->use_empty() && + match(Call, m_Intrinsic())) + CallMask &= ModRefInfo::Ref; + // TODO: See if we need to exclude additional intrinsics. + + if (isAllocFn(Call, TLI)) { + // Handle realloc as a special case. + if (isReallocFn(Call)) { + // TODO: Try to get the size of the object being copied from. + AccI.push_back(GeneralAccess(I, MemoryLocation::getForArgument( + Call, 0, TLI), 0, + AA->getArgModRefInfo(Call, 0))); + // If we assume malloc is safe, don't worry about opaque accesses by + // realloc. + if (!AssumeSafeMalloc) + AccI.push_back(GeneralAccess(I, std::nullopt, CallMask)); + return; + } + } + + for (auto IdxArgPair : enumerate(Call->args())) { + int ArgIdx = IdxArgPair.index(); + const Value *Arg = IdxArgPair.value(); + if (!Arg->getType()->isPointerTy()) + continue; + MemoryLocation ArgLoc = + MemoryLocation::getForArgument(Call, ArgIdx, TLI); + if (AA->pointsToConstantMemory(ArgLoc)) + continue; + ModRefInfo ArgMask = AA->getArgModRefInfo(Call, ArgIdx); + ArgMask &= CallMask; + if (!isNoModRef(ArgMask)) { + AccI.push_back(GeneralAccess(I, ArgLoc, ArgIdx, ArgMask)); + } + } + + // If we find a free call and we assume malloc is safe, don't worry about + // opaque accesses by that free call. + if (AssumeSafeMalloc && getFreedOperand(Call, TLI)) + return; + + if (!Call->onlyAccessesArgMemory()) + // Add a generic GeneralAccess for this call to represent the fact that it + // might access arbitrary global memory. + AccI.push_back(GeneralAccess(I, std::nullopt, CallMask)); + return; + } +} + +void AccessPtrAnalysis::addFunctionArgument(Value *Arg) { + ArgumentPtrs.insert(Arg); +} + +void AccessPtrAnalysis::addAccess(Instruction *I) { + if (checkInstructionForRace(I, TLI)) { + + // Exclude calls to functions in ABIList. + if (const CallBase *Call = dyn_cast(I)) { + if (const Function *CF = Call->getCalledFunction()) + if (ABIList.isIn(*CF)) + return; + } else { + MemoryLocation Loc = MemoryLocation::get(I); + if (Loc.Ptr) { + if (const Value *UnderlyingObj = getUnderlyingObject(Loc.Ptr, 0)) { + if (const GlobalVariable *GV = + dyn_cast(UnderlyingObj)) + if (ABIList.isIn(*GV)) + return; + if (ABIList.isIn(*UnderlyingObj->getType())) + return; + } + } + } + + SmallVector GA; + GetGeneralAccesses(I, GA, DI.getAA(), TLI); + TaskAccessMap[TI.getTaskFor(I->getParent())].append(GA.begin(), GA.end()); + SpindleAccessMap[TI.getSpindleFor(I->getParent())].append(GA.begin(), + GA.end()); + if (Loop *L = LI.getLoopFor(I->getParent())) + LoopAccessMap[L].append(GA.begin(), GA.end()); + + for (GeneralAccess Acc : GA) { + // Skip this access if it does not have a valid pointer. + if (!Acc.getPtr()) + continue; + + MemAccessInfo Access(Acc.getPtr(), Acc.isMod()); + // DepCands.insert(Access); + + SmallVector Objects; + LLVM_DEBUG(dbgs() << "Getting underlying objects for " << *Acc.getPtr() + << "\n"); + getUnderlyingObjects(const_cast(Acc.getPtr()), Objects, &LI, 0); + for (const Value *Obj : Objects) { + LLVM_DEBUG(dbgs() << " Considering object: " << *Obj << "\n"); + // nullptr never alias, don't join sets for pointer that have "null" in + // their UnderlyingObjects list. + if (isa(Obj) && + !NullPointerIsDefined(I->getFunction(), + Obj->getType()->getPointerAddressSpace())) + continue; + + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + if (const Constant *C = dyn_cast(Obj)) { + // This check is derived from Transforms/Utils/InlineFunction.cpp + bool IsNonPtrConst = isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C) || isa(C) || + isa(C); + if (IsNonPtrConst) + continue; + } + + if (const GlobalVariable *GV = dyn_cast(Obj)) + // Constant variables cannot race. + if (GV->isConstant()) + continue; + + if (isa(Obj)) + // Assume that functions are read-only + continue; + + LLVM_DEBUG(dbgs() << "Adding object for access:\n Obj: " << *Obj + << "\n Access: " << *Acc.getPtr() << "\n"); + AccessToObjs[Access].insert(Obj); + + // UnderlyingObjToAccessMap::iterator Prev = ObjToLastAccess.find(Obj); + // if (Prev != ObjToLastAccess.end()) + // DepCands.unionSets(Access, Prev->second); + + // ObjToLastAccess[Obj] = Access; + } + } + } +} + +static const Loop *getCommonLoop(const BasicBlock *B1, const BasicBlock *B2, + LoopInfo &LI) { + unsigned B1Level = LI.getLoopDepth(B1); + unsigned B2Level = LI.getLoopDepth(B2); + const Loop *L1 = LI.getLoopFor(B1); + const Loop *L2 = LI.getLoopFor(B2); + while (B1Level > B2Level) { + L1 = L1->getParentLoop(); + B1Level--; + } + while (B2Level > B1Level) { + L2 = L2->getParentLoop(); + B2Level--; + } + while (L1 != L2) { + L1 = L1->getParentLoop(); + L2 = L2->getParentLoop(); + } + return L1; +} + +static const Loop *getCommonLoop(const Loop *L, const BasicBlock *B, + LoopInfo &LI) { + unsigned L1Level = L->getLoopDepth(); + unsigned L2Level = LI.getLoopDepth(B); + const Loop *L1 = L; + const Loop *L2 = LI.getLoopFor(B); + while (L1Level > L2Level) { + L1 = L1->getParentLoop(); + L1Level--; + } + while (L2Level > L1Level) { + L2 = L2->getParentLoop(); + L2Level--; + } + while (L1 != L2) { + L1 = L1->getParentLoop(); + L2 = L2->getParentLoop(); + } + return L1; +} + +static const Spindle *GetRepSpindleInTask(const Spindle *S, const Task *T, + const TaskInfo &TI) { + const Task *Encl = T->getSubTaskEnclosing(S->getEntry()); + if (Encl->isRootTask()) + return S; + return TI.getSpindleFor(Encl->getDetach()->getContinue()); +} + +bool AccessPtrAnalysis::checkDependence(std::unique_ptr D, + GeneralAccess &GA1, + GeneralAccess &GA2) { + if (!D) { + LLVM_DEBUG(dbgs() << "No dependence\n"); + return false; + } + + LLVM_DEBUG({ + D->dump(dbgs()); + StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; + dbgs() << "Found " << DepType << " dependency between Src and Dst\n"; + unsigned Levels = D->getLevels(); + for (unsigned II = 1; II <= Levels; ++II) { + const SCEV *Distance = D->getDistance(II); + if (Distance) + dbgs() << "Level " << II << " distance " << *Distance << "\n"; + } + }); + + Instruction *I1 = GA1.I; + Instruction *I2 = GA2.I; + BasicBlock *B1 = I1->getParent(); + BasicBlock *B2 = I2->getParent(); + + // Only dependencies that cross tasks can produce determinacy races. + // Dependencies that cross loop iterations within the same task don't matter. + + // Find the deepest loop that contains both B1 and B2. + const Loop *CommonLoop = getCommonLoop(B1, B2, LI); + unsigned MaxLoopDepthToCheck = CommonLoop ? CommonLoop->getLoopDepth() : 0; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + // If there's no loop to worry about, then the existence of the dependence + // implies the potential for a race. + return true; + + // Use the base objects for the addresses to try to further refine the checks. + + // TODO: Use lifetime_begin intrinsics to further refine checks. + const Loop *CommonObjLoop = CommonLoop; + unsigned MinObjDepth = CommonLoop->getLoopDepth(); + SmallPtrSet BaseObjs; + MemAccessInfo MA1(GA1.getPtr(), GA1.isMod()); + MemAccessInfo MA2(GA2.getPtr(), GA2.isMod()); + for (const Value *Obj : AccessToObjs[MA1]) { + if (AccessToObjs[MA2].count(Obj)) + BaseObjs.insert(Obj); + else { + MinObjDepth = 0; + break; + } + } + for (const Value *Obj : AccessToObjs[MA2]) { + if (AccessToObjs[MA1].count(Obj)) + BaseObjs.insert(Obj); + else { + MinObjDepth = 0; + break; + } + } + + // If we didn't find any base objects, we have no common-object loop. + if (BaseObjs.empty()) + CommonObjLoop = nullptr; + + // Set MinObjDepth to 0 if there are not base objects to check. + if (BaseObjs.empty() || !CommonObjLoop) + MinObjDepth = 0; + + if (MinObjDepth != 0) { + for (const Value *Obj : BaseObjs) { + // If there are no more levels of common loop to check, return. + if (!CommonObjLoop) + break; + + LLVM_DEBUG(dbgs() << "Checking base object " << *Obj << "\n"); + assert(!(isa(Obj) && + !NullPointerIsDefined(B1->getParent(), + Obj->getType()->getPointerAddressSpace())) + && "nullptr in list of base objects"); + + // If the object is not an instruction, then there's no common loop to + // find. + if (!isa(Obj)) { + CommonObjLoop = nullptr; + break; + } + + // This optimization of bounding the loop nest to check only applies if + // the underlying objects perform an allocation. + const Instruction *ObjI = dyn_cast(Obj); + if (!isa(ObjI) && !isa(ObjI)) { + CommonObjLoop = nullptr; + break; + } + if (isa(ObjI)) + // Update the common loop for the underlying objects. + CommonObjLoop = getCommonLoop(CommonObjLoop, ObjI->getParent(), LI); + else if (const CallBase *CB = dyn_cast(ObjI)) { + if (!CB->returnDoesNotAlias()) { + CommonObjLoop = nullptr; + break; + } + // Update the common loop for the underlying objects. + CommonObjLoop = getCommonLoop(CommonObjLoop, ObjI->getParent(), LI); + } + } + } + // Save the depth of the common loop as the lower bound on the loop depth to + // check. + if (!CommonObjLoop) { + LLVM_DEBUG(dbgs() << "No common loop found for underlying objects.\n"); + MinObjDepth = 0; + } else + MinObjDepth = CommonObjLoop->getLoopDepth(); + + LLVM_DEBUG(dbgs() << "Min loop depth " << MinObjDepth << + " for underlying object.\n"); + + LLVM_DEBUG({ + if (MinObjDepth > MaxLoopDepthToCheck) { + dbgs() << "\tI1 " << *I1 << "\n\tI2 " << *I2; + dbgs() << "\n\tPtr1 " << *GA1.getPtr() + << " (null? " << (isa(GA1.getPtr())) << ")"; + dbgs() << "\n\tPtr2 " << *GA2.getPtr() + << " (null? " << (isa(GA2.getPtr())) << ")"; + dbgs() << "\n\tAddrspace " + << GA1.getPtr()->getType()->getPointerAddressSpace(); + dbgs() << "\n\tnullptr is defined? " + << (NullPointerIsDefined(B1->getParent())); + dbgs() << "\n\tMaxLoopDepthToCheck " << MaxLoopDepthToCheck; + dbgs() << "\n\tMinObjDepthToCheck " << MinObjDepth << "\n"; + } + }); + assert(MinObjDepth <= MaxLoopDepthToCheck && + "Minimum loop depth of underlying object cannot be greater " + "than maximum loop depth of dependence."); + + // Get the task that encloses both B1 and B2. + const Task *CommonTask = TI.getEnclosingTask(B1, B2); + // Get the representative spindles for both B1 and B2 in this common task. + const Spindle *I1Spindle = GetRepSpindleInTask(TI.getSpindleFor(B1), + CommonTask, TI); + const Spindle *I2Spindle = GetRepSpindleInTask(TI.getSpindleFor(B2), + CommonTask, TI); + // If this common loop does not contain the common task, then dependencies at + // the level of this common loop do not constitute a potential race. Find the + // loop that contains the enclosing task. + // + // Skip this step if either representative spindle is a shared-eh spindle, + // because those are more complicated. + if (!I1Spindle->isSharedEH() && !I2Spindle->isSharedEH()) { + if (!CommonLoop->contains(CommonTask->getEntry())) { + const Loop *CommonTaskLoop = LI.getLoopFor(CommonTask->getEntry()); + // Typically, CommonTaskLoop is a subloop of CommonLoop. But that doesn't + // have to be true, e.g., if CommonLoop appears in an exit of + // CommonTaskLoop. + CommonLoop = CommonTaskLoop; + } + // Update MaxLoopDepthToCheck + MaxLoopDepthToCheck = CommonLoop ? CommonLoop->getLoopDepth() : 0; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + MaxLoopDepthToCheck = MinObjDepth; + } + + if (MaxLoopDepthToCheck == MinObjDepth) { + LLVM_DEBUG(dbgs() << "Minimum object depth matches maximum loop depth.\n"); + if (TI.getTaskFor(B1) == TI.getTaskFor(B2)) + return false; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + // If there's no loop to worry about, then the existence of the dependence + // implies the potential for a race. + return true; + + if (!(D->getDirection(MaxLoopDepthToCheck) & Dependence::DVEntry::EQ)) + // Apparent dependence does not occur within the same iteration. + return false; + + // Check if the instructions are parallel when the loop backedge is excluded + // from dataflow. + for (const Task *MPT : MPTasksInLoop.TaskList[I1Spindle]) + if (TI.encloses(MPT, B2)) + return true; + for (const Task *MPT : MPTasksInLoop.TaskList[I2Spindle]) + if (TI.encloses(MPT, B1)) + return true; + + return false; + } + + // Get the whole loop stack to check above the common loop. + SmallVector LoopsToCheck; + const Loop *CurrLoop = CommonLoop; + while (CurrLoop) { + LoopsToCheck.push_back(CurrLoop); + CurrLoop = CurrLoop->getParentLoop(); + } + + // Check the loop stack from the top down until a loop is found where the + // dependence might cross parallel tasks. + unsigned MinLoopDepthToCheck = 1; + while (!LoopsToCheck.empty()) { + const Loop *CurrLoop = LoopsToCheck.pop_back_val(); + // If we're not yet at the minimum loop depth of the underlying object, go + // deeper. + if (MinLoopDepthToCheck < MinObjDepth) { + ++MinLoopDepthToCheck; + continue; + } + + // Check the maybe-parallel tasks for the spindle containing the loop + // header. + const Spindle *CurrSpindle = TI.getSpindleFor(CurrLoop->getHeader()); + bool MPTEnclosesDst = false; + for (const Task *MPT : MPTasks.TaskList[CurrSpindle]) { + if (TI.encloses(MPT, B2)) { + MPTEnclosesDst = true; + break; + } + } + + // If Dst is found in a maybe-parallel task, then the minimum loop depth has + // been found. + if (MPTEnclosesDst) + break; + // Otherwise go deeper. + ++MinLoopDepthToCheck; + } + + // Scan the loop nests in common from inside out. + for (unsigned II = MaxLoopDepthToCheck; II >= MinLoopDepthToCheck; --II) { + LLVM_DEBUG(dbgs() << "Checking loop level " << II << "\n"); + if (D->isScalar(II)) + return true; + if (D->getDirection(II) & unsigned(~Dependence::DVEntry::EQ)) + return true; + } + + LLVM_DEBUG(dbgs() << "Dependence does not cross parallel tasks.\n"); + return false; +} + +bool AccessPtrAnalysis::PointerCapturedBefore(const Value *Ptr, + const Instruction *I, + unsigned MaxUsesToExplore = + MaxUsesToExploreCapture) const { + const Value *StrippedPtr = Ptr->stripInBoundsOffsets(); + // Do not treat NULL pointers as captured. + if (isa(StrippedPtr)) + return false; + auto CaptureQuery = std::make_pair(StrippedPtr, I); + if (MayBeCapturedCache.count(CaptureQuery)) + return MayBeCapturedCache[CaptureQuery]; + + bool Result = false; + if (isa(StrippedPtr)) + // We assume that globals are captured. + // + // TODO: Possibly refine this check for private or internal globals. + Result = true; + else if (!isa(StrippedPtr)) { + // If we could strip the pointer, we conservatively assume it may be + // captured. + LLVM_DEBUG(dbgs() << "PointerCapturedBefore: Could not fully strip pointer " + << *Ptr << "\n"); + Result = true; + } else + Result = PointerMayBeCapturedBefore(StrippedPtr, false, false, I, &DT, true, + MaxUsesToExplore); + MayBeCapturedCache[CaptureQuery] = Result; + return Result; +} + +bool AccessPtrAnalysis::checkOpaqueAccesses(GeneralAccess &GA1, + GeneralAccess &GA2) { + // If neither instruction may write to memory, then no race is possible. + if (!GA1.I->mayWriteToMemory() && !GA2.I->mayWriteToMemory()) + return false; + + if (!GA1.Loc && !GA2.Loc) { + LLVM_DEBUG({ + const CallBase *Call1 = cast(GA1.I); + const CallBase *Call2 = cast(GA2.I); + + assert(!AA->doesNotAccessMemory(Call1) && + !AA->doesNotAccessMemory(Call2) && + "Opaque call does not access memory."); + assert(!AA->getMemoryEffects(Call1).onlyAccessesArgPointees() && + !AA->getMemoryEffects(Call2).onlyAccessesArgPointees() && + "Opaque call only accesses arg pointees."); + }); + // // If both calls only read memory, then there's no dependence. + // if (AA->onlyReadsMemory(Call1) && AA->onlyReadsMemory(Call2)) + // return false; + + // We have two logically-parallel calls that opaquely access memory, and at + // least one call modifies memory. Hence we have a dependnece and potential + // race. + return true; + } + + BasicBlock *B1 = GA1.I->getParent(); + BasicBlock *B2 = GA2.I->getParent(); + + // Get information about the non-opaque access. + const Value *Ptr; + Instruction *NonOpaque; + if (GA1.Loc) { + Ptr = GA1.getPtr(); + NonOpaque = GA1.I; + } else { // GA2.Loc + Ptr = GA2.getPtr(); + NonOpaque = GA2.I; + } + + // One access is opaque, while the other has a pointer. For the opaque access + // to race, the pointer must escape before the non-opaque instruction. + if (!PointerCapturedBefore(Ptr, NonOpaque)) + return false; + + // TODO: Use the instruction that performs the capture to further bound the + // subsequent loop checks. + + // Otherwise we check the logical parallelism of the access. Because one of + // the pointers is null, we assume that the "minimum object depth" is 0. + unsigned MinObjDepth = 0; + LLVM_DEBUG(dbgs() << "Min loop depth " << MinObjDepth + << " used for opaque accesses.\n"); + + // Find the deepest loop that contains both B1 and B2. + const Loop *CommonLoop = getCommonLoop(B1, B2, LI); + unsigned MaxLoopDepthToCheck = CommonLoop ? CommonLoop->getLoopDepth() : 0; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + // If there's no loop to worry about, then the existence of the dependence + // implies the potential for a race. + return true; + + LLVM_DEBUG( + if (MinObjDepth > MaxLoopDepthToCheck) { + dbgs() << "\tI1 " << *GA1.I << "\n\tI2 " << *GA2.I; + dbgs() << "\n\tMaxLoopDepthToCheck " << MaxLoopDepthToCheck; + dbgs() << "\n\tMinObjDepthToCheck " << MinObjDepth << "\n"; + dbgs() << *GA1.I->getFunction(); + }); + assert(MinObjDepth <= MaxLoopDepthToCheck && + "Minimum loop depth of underlying object cannot be greater " + "than maximum loop depth of dependence."); + + // Get the task that encloses both B1 and B2. + const Task *CommonTask = TI.getEnclosingTask(B1, B2); + // Get the representative spindles for both B1 and B2 in this common task. + const Spindle *I1Spindle = GetRepSpindleInTask(TI.getSpindleFor(B1), + CommonTask, TI); + const Spindle *I2Spindle = GetRepSpindleInTask(TI.getSpindleFor(B2), + CommonTask, TI); + // If this common loop does not contain the common task, then dependencies at + // the level of this common loop do not constitute a potential race. Find the + // loop that contains the enclosing task. + // + // Skip this step if either representative spindle is a shared-eh spindle, + // because those are more complicated. + if (!I1Spindle->isSharedEH() && !I2Spindle->isSharedEH()) { + if (!CommonLoop->contains(CommonTask->getEntry())) { + const Loop *CommonTaskLoop = LI.getLoopFor(CommonTask->getEntry()); + // Typically, CommonTaskLoop is a subloop of CommonLoop. But that doesn't + // have to be true, e.g., if CommonLoop appears in an exit of + // CommonTaskLoop. + // assert((!CommonTaskLoop || CommonTaskLoop->contains(CommonLoop)) && + // "Loop for common task does not contain common loop."); + CommonLoop = CommonTaskLoop; + } + // Update MaxLoopDepthToCheck + MaxLoopDepthToCheck = CommonLoop ? CommonLoop->getLoopDepth() : 0; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + MaxLoopDepthToCheck = MinObjDepth; + } + + if (MaxLoopDepthToCheck == MinObjDepth) { + LLVM_DEBUG(dbgs() << "Minimum object depth matches maximum loop depth.\n"); + if (TI.getTaskFor(B1) == TI.getTaskFor(B2)) + return false; + + // Check if dependence does not depend on looping. + if (0 == MaxLoopDepthToCheck) + // If there's no loop to worry about, then the existence of the dependence + // implies the potential for a race. + return true; + + // Check if the instructions are parallel when the loop backedge is excluded + // from dataflow. + for (const Task *MPT : MPTasksInLoop.TaskList[I1Spindle]) + if (TI.encloses(MPT, B2)) + return true; + for (const Task *MPT : MPTasksInLoop.TaskList[I2Spindle]) + if (TI.encloses(MPT, B1)) + return true; + + return false; + } + + // The opaque access acts like a dependence across all iterations of any loops + // containing the accesses. + return true; +} + +static void setObjectMRForRace(RaceInfo::ObjectMRTy &ObjectMRForRace, + const Value *Ptr, ModRefInfo MRI) { + if (!ObjectMRForRace.count(Ptr)) + ObjectMRForRace[Ptr] = ModRefInfo::NoModRef; + ObjectMRForRace[Ptr] |= MRI; +} + +void AccessPtrAnalysis::recordLocalRace(const GeneralAccess &GA, + RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace, + const GeneralAccess &Racer) { + Result.recordLocalRace(GA, Racer); + + if (!GA.getPtr()) + return; + + for (const Value *Obj : AccessToObjs[MemAccessInfo(GA.getPtr(), GA.isMod())]) { + if (GA.isMod()) + setObjectMRForRace(ObjectMRForRace, Obj, ModRefInfo::Ref); + setObjectMRForRace(ObjectMRForRace, Obj, ModRefInfo::Mod); + } +} + +static void recordAncestorRace(const GeneralAccess &GA, const Value *Ptr, + RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace, + const GeneralAccess &Racer = GeneralAccess()) { + if (GA.isMod()) { + Result.recordRaceViaAncestorRef(GA, Racer); + setObjectMRForRace(ObjectMRForRace, Ptr, ModRefInfo::Ref); + } + Result.recordRaceViaAncestorMod(GA, Racer); + setObjectMRForRace(ObjectMRForRace, Ptr, ModRefInfo::Mod); +} + +static void recordOpaqueRace(const GeneralAccess &GA, const Value *Ptr, + RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace, + const GeneralAccess &Racer = GeneralAccess()) { + if (GA.isMod()) { + Result.recordOpaqueRace(GA, Racer); + setObjectMRForRace(ObjectMRForRace, Ptr, ModRefInfo::Ref); + } + Result.recordOpaqueRace(GA, Racer); + setObjectMRForRace(ObjectMRForRace, Ptr, ModRefInfo::Mod); +} + +// Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their +// underlaying objects. If LocA and LocB are known to not alias (for any reason: +// tbaa, non-overlapping regions etc), then it is known there is no dependecy. +// Otherwise the underlying objects are checked to see if they point to +// different identifiable objects. +AliasResult +AccessPtrAnalysis::underlyingObjectsAlias(const GeneralAccess &GAA, + const GeneralAccess &GAB) { + MemoryLocation LocA = *GAA.Loc; + MemoryLocation LocB = *GAB.Loc; + // Check the original locations (minus size) for noalias, which can happen for + // tbaa, incompatible underlying object locations, etc. + MemoryLocation LocAS = + MemoryLocation::getBeforeOrAfter(LocA.Ptr, LocA.AATags); + MemoryLocation LocBS = + MemoryLocation::getBeforeOrAfter(LocB.Ptr, LocB.AATags); + if (AA->alias(LocAS, LocBS) == AliasResult::NoAlias) + return AliasResult::NoAlias; + + // Check the underlying objects are the same + const Value *AObj = getUnderlyingObject(LocA.Ptr); + const Value *BObj = getUnderlyingObject(LocB.Ptr); + + // If the underlying objects are the same, they must alias + if (AObj == BObj) + return AliasResult::MustAlias; + + // We may have hit the recursion limit for underlying objects, or have + // underlying objects where we don't know they will alias. + if (!isIdentifiedObject(AObj) || !isIdentifiedObject(BObj)) { + if ((isIdentifiedObject(AObj) && !PointerCapturedBefore(AObj, GAB.I)) || + (isIdentifiedObject(BObj) && !PointerCapturedBefore(BObj, GAA.I))) + return AliasResult::NoAlias; + return AliasResult::MayAlias; + } + + // Otherwise we know the objects are different and both identified objects so + // must not alias. + return AliasResult::NoAlias; +} + +static bool isThreadLocalObject(const Value *V) { + if (const IntrinsicInst *II = dyn_cast(V)) + return Intrinsic::threadlocal_address == II->getIntrinsicID(); + if (const GlobalValue *GV = dyn_cast(V)) + return GV->isThreadLocal(); + return false; +} + +void AccessPtrAnalysis::evaluateMaybeParallelAccesses( + GeneralAccess &GA1, GeneralAccess &GA2, RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace) { + // No race is possible if no access modifies. + if (!GA1.isMod() && !GA2.isMod()) + return; + + bool LocalRace = false; + if (!GA1.getPtr() || !GA2.getPtr()) { + LLVM_DEBUG({ + dbgs() << "Checking for race involving opaque access:\n" + << " GA1 =\n"; + if (GA1.getPtr()) + dbgs() << " Ptr:" << *GA1.getPtr() << "\n"; + else + dbgs() << " Ptr: null\n"; + dbgs() << " I:" << *GA1.I << "\n" + << " GA2 =\n"; + if (GA2.getPtr()) + dbgs() << " Ptr:" << *GA2.getPtr() << "\n"; + else + dbgs() << " Ptr: null\n"; + dbgs() << " I:" << *GA2.I << "\n";}); + if (checkOpaqueAccesses(GA1, GA2)) + LocalRace = true; + } else { + // If either GA has a nullptr, then skip the check, since nullptr's cannot + // alias. + Function *F = GA1.I->getFunction(); + if (isa(GA1.getPtr()) && + !NullPointerIsDefined( + F, GA1.getPtr()->getType()->getPointerAddressSpace())) + return; + if (isa(GA2.getPtr()) && + !NullPointerIsDefined( + F, GA2.getPtr()->getType()->getPointerAddressSpace())) + return; + + // If the underlying objects cannot alias, then skip the check. + if (AliasResult::NoAlias == underlyingObjectsAlias(GA1, GA2)) + return; + + // If both objects are thread-local, then skip the check. + if (isThreadLocalObject(GA1.getPtr()) && isThreadLocalObject(GA2.getPtr())) + return; + + LLVM_DEBUG( + dbgs() << "Checking for race from dependence:\n" + << " GA1 =\n" + << " Ptr:" << *GA1.getPtr() << "\n I:" << *GA1.I << "\n" + << " GA2 =\n" + << " Ptr:" << *GA2.getPtr() << "\n I:" << *GA2.I << "\n"); + if (checkDependence(DI.depends(&GA1, &GA2, true), GA1, GA2)) + LocalRace = true; + } + + if (LocalRace) { + LLVM_DEBUG(dbgs() << "Local race found:\n" + << " I1 =" << *GA1.I << "\n I2 =" << *GA2.I << "\n"); + recordLocalRace(GA1, Result, ObjectMRForRace, GA2); + recordLocalRace(GA2, Result, ObjectMRForRace, GA1); + } +} + +void AccessPtrAnalysis::checkForRacesHelper( + const Task *T, RaceInfo::ResultTy &Result, + RaceInfo::ObjectMRTy &ObjectMRForRace) { + SmallPtrSet Visited; + + // Now handle each spindle in this task. + for (const Spindle *S : + depth_first>(T->getEntrySpindle())) { + LLVM_DEBUG(dbgs() << "Testing Spindle@" << S->getEntry()->getName() + << "\n"); + for (GeneralAccess GA : SpindleAccessMap[S]) { + if (GA.getPtr()) { + LLVM_DEBUG({ + dbgs() << "GA Underlying objects:\n"; + for (const Value *Obj : + AccessToObjs[MemAccessInfo(GA.getPtr(), GA.isMod())]) + dbgs() << " " << *Obj << "\n"; + }); + for (const Value *Obj : + AccessToObjs[MemAccessInfo(GA.getPtr(), GA.isMod())]) { + if (isa(Obj)) + // Races on alloca'd objects are checked locally. + continue; + + if (AssumeSafeMalloc && isAllocFn(Obj, TLI)) + // Races on malloc'd objects are checked locally. + continue; + + if (const Argument *A = dyn_cast(Obj)) { + // Check if the attributes on the argument preclude a race with the + // caller. + if (A->hasByValAttr() || // A->hasNoAliasAttr() || + A->hasStructRetAttr() || A->hasInAllocaAttr()) + continue; + + // Otherwise record the possible race with an ancestor. + LLVM_DEBUG(dbgs() << "Setting race via ancestor:\n" + << " GA.I: " << *GA.I << "\n" + << " Arg: " << *A << "\n"); + recordAncestorRace(GA, A, Result, ObjectMRForRace); + continue; + } + + if (const GlobalVariable *GV = dyn_cast(Obj)) { + // Constant variables cannot race. + assert(!GV->isConstant() && "Constant GV should be excluded."); + if (GV->hasPrivateLinkage() || GV->hasInternalLinkage()) { + // Races are only possible with ancestor functions in this module. + LLVM_DEBUG(dbgs() << "Setting race via private/internal global:\n" + << " GA.I: " << *GA.I << "\n" + << " GV: " << *GV << "\n"); + // TODO: Add MAAPs for private and internal global variables. + recordAncestorRace(GA, GV, Result, ObjectMRForRace); + // recordOpaqueRace(GA, GV, Result, ObjectMRForRace); + } else { + // Record the possible opaque race. + LLVM_DEBUG(dbgs() << "Setting opaque race:\n" + << " GA.I: " << *GA.I << "\n" + << " GV: " << *GV << "\n"); + recordOpaqueRace(GA, GV, Result, ObjectMRForRace); + } + continue; + } + + if (isa(Obj)) { + // Record the possible opaque race. + LLVM_DEBUG(dbgs() << "Setting opaque race:\n" + << " GA.I: " << *GA.I << "\n" + << " Obj: " << *Obj << "\n"); + recordOpaqueRace(GA, Obj, Result, ObjectMRForRace); + continue; + } + + if (!isa(Obj)) { + dbgs() << "ALERT: Unexpected underlying object: " << *Obj << "\n"; + } + + // Record the possible opaque race. + LLVM_DEBUG(dbgs() << "Setting opaque race:\n" + << " GA.I: " << *GA.I << "\n" + << " Obj: " << *Obj << "\n"); + recordOpaqueRace(GA, Obj, Result, ObjectMRForRace); + } + } + } + for (const Task *MPT : MPTasks.TaskList[S]) { + LLVM_DEBUG(dbgs() << "Testing against Task@" << MPT->getEntry()->getName() + << "\n"); + for (const Task *SubMPT : depth_first(MPT)) + for (GeneralAccess GA1 : SpindleAccessMap[S]) + for (GeneralAccess GA2 : TaskAccessMap[SubMPT]) + evaluateMaybeParallelAccesses(GA1, GA2, Result, ObjectMRForRace); + } + // If a successor of this spindle belongs to a subtask, recursively process + // that subtask. + for (const Spindle *Succ : successors(S)) { + if (S->succInSubTask(Succ)) { + // Skip successor spindles we've seen before. + if (!Visited.insert(Succ).second) + continue; + checkForRacesHelper(Succ->getParentTask(), Result, ObjectMRForRace); + } + } + } +} + +// /// Check whether a pointer can participate in a runtime bounds check. +// /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr +// /// by adding run-time checks (overflow checks) if necessary. +// static bool hasComputableBounds(PredicatedScalarEvolution &PSE, +// const ValueToValueMap &Strides, Value *Ptr, +// Loop *L, bool Assume) { +// const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); + +// // The bounds for loop-invariant pointer is trivial. +// if (PSE.getSE()->isLoopInvariant(PtrScev, L)) +// return true; + +// const SCEVAddRecExpr *AR = dyn_cast(PtrScev); + +// if (!AR && Assume) +// AR = PSE.getAsAddRec(Ptr); + +// if (!AR) +// return false; + +// return AR->isAffine(); +// } + +// /// Check whether a pointer address cannot wrap. +// static bool isNoWrap(PredicatedScalarEvolution &PSE, +// const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, +// Loop *L) { +// const SCEV *PtrScev = PSE.getSCEV(Ptr); +// if (PSE.getSE()->isLoopInvariant(PtrScev, L)) +// return true; + +// int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides); +// if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) +// return true; + +// return false; +// } + +// namespace { +// // This class is based on LoopAccessAnalysis, but is not focused on +// // vectorization. +// class RTPtrCheckAnalysis { +// public: +// using MemAccessInfo = PointerIntPair; +// using MemAccessInfoList = SmallVector; +// using DepCandidates = EquivalenceClasses; +// using UnderlyingObjToAccessMap = DenseMap; + +// RTPtrCheckAnalysis(Loop *L, RuntimePointerChecking &RtCheck, +// AliasAnalysis *AA, ScalarEvolution &SE) +// : TheLoop(L), RtCheck(RtCheck), PSE(SE, *L), AST(*AA) {} + +// void addAccess(GeneralAccess GA, bool IsReadOnlyPtr = false) { +// if (GA.getPtr()) { +// LLVM_DEBUG(dbgs() << "Adding access for RT pointer checking:\n" +// << " GA.I: " << *GA.I << "\n" +// << " GA.Ptr: " << *GA.getPtr() << "\n"); +// AST.add(GA.I); +// Value *Ptr = const_cast(GA.getPtr()); +// Accesses.insert(MemAccessInfo(Ptr, GA.isMod())); +// if (IsReadOnlyPtr) +// ReadOnlyPtr.insert(Ptr); +// collectStridedAccess(GA.I); +// } +// } +// void processAccesses( +// AccessPtrAnalysis::AccessToUnderlyingObjMap &AccessToObjs); +// bool canCheckPtrAtRT(bool ShouldCheckWrap = false); + +// /// Initial processing of memory accesses determined that we need to +// /// perform dependency checking. +// /// +// /// Note that this can later be cleared if we retry memcheck analysis without +// /// dependency checking (i.e. FoundNonConstantDistanceDependence). +// bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } + +// private: +// void collectStridedAccess(Value *MemAccess); +// bool createCheckForAccess(MemAccessInfo Access, +// DenseMap &DepSetId, +// unsigned &RunningDepId, unsigned ASId, +// bool ShouldCheckWrap, bool Assume); + +// /// The loop being checked. +// Loop *TheLoop; + +// /// The resulting RT check. +// RuntimePointerChecking &RtCheck; + +// SetVector Accesses; + +// /// List of accesses that need a further dependence check. +// MemAccessInfoList CheckDeps; + +// /// Set of pointers that are read only. +// SmallPtrSet ReadOnlyPtr; + +// // Sets of potentially dependent accesses - members of one set share an +// // underlying pointer. The set "CheckDeps" identfies which sets really need a +// // dependence check. +// DepCandidates DepCands; + +// /// The SCEV predicate containing all the SCEV-related assumptions. +// PredicatedScalarEvolution PSE; + +// /// An alias set tracker to partition the access set by underlying object and +// /// intrinsic property (such as TBAA metadata). +// AliasSetTracker AST; + +// /// Initial processing of memory accesses determined that we may need +// /// to add memchecks. Perform the analysis to determine the necessary checks. +// /// +// /// Note that, this is different from isDependencyCheckNeeded. When we retry +// /// memcheck analysis without dependency checking +// /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is +// /// cleared while this remains set if we have potentially dependent accesses. +// bool IsRTCheckAnalysisNeeded = false; + +// /// If an access has a symbolic strides, this maps the pointer value to +// /// the stride symbol. +// ValueToValueMap SymbolicStrides; + +// /// Set of symbolic strides values. +// SmallPtrSet StrideSet; +// }; +// } // end anonymous namespace + +// // This code is borrowed from LoopAccessAnalysis.cpp +// void RTPtrCheckAnalysis::collectStridedAccess(Value *MemAccess) { +// Value *Ptr = nullptr; +// if (LoadInst *LI = dyn_cast(MemAccess)) +// Ptr = LI->getPointerOperand(); +// else if (StoreInst *SI = dyn_cast(MemAccess)) +// Ptr = SI->getPointerOperand(); +// else +// return; + +// Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop); +// if (!Stride) +// return; + +// LLVM_DEBUG(dbgs() << "TapirRD: Found a strided access that is a candidate " +// "for versioning:"); +// LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); + +// // Avoid adding the "Stride == 1" predicate when we know that +// // Stride >= Trip-Count. Such a predicate will effectively optimize a single +// // or zero iteration loop, as Trip-Count <= Stride == 1. +// // +// // TODO: We are currently not making a very informed decision on when it is +// // beneficial to apply stride versioning. It might make more sense that the +// // users of this analysis (such as the vectorizer) will trigger it, based on +// // their specific cost considerations; For example, in cases where stride +// // versioning does not help resolving memory accesses/dependences, the +// // vectorizer should evaluate the cost of the runtime test, and the benefit +// // of various possible stride specializations, considering the alternatives +// // of using gather/scatters (if available). + +// const SCEV *StrideExpr = PSE.getSCEV(Stride); +// const SCEV *BETakenCount = PSE.getBackedgeTakenCount(); + +// // Match the types so we can compare the stride and the BETakenCount. +// // The Stride can be positive/negative, so we sign extend Stride; +// // The backdgeTakenCount is non-negative, so we zero extend BETakenCount. +// const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout(); +// uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType()); +// uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType()); +// const SCEV *CastedStride = StrideExpr; +// const SCEV *CastedBECount = BETakenCount; +// ScalarEvolution *SE = PSE.getSE(); +// if (BETypeSize >= StrideTypeSize) +// CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType()); +// else +// CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType()); +// const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); +// // Since TripCount == BackEdgeTakenCount + 1, checking +// // Stride >= TripCount is equivalent to checking +// // Stride - BETakenCount > 0 +// if (SE->isKnownPositive(StrideMinusBETaken)) { +// LLVM_DEBUG( +// dbgs() << "TapirRD: Stride>=TripCount; No point in versioning as the " +// "Stride==1 predicate will imply that the loop executes " +// "at most once.\n"); +// return; +// } +// LLVM_DEBUG(dbgs() << "TapirRD: Found a strided access that we can version."); + +// SymbolicStrides[Ptr] = Stride; +// StrideSet.insert(Stride); +// } + +// // This code is based on AccessAnalysis::processMemAccesses() in +// // LoopAccessAnalysis.cpp. +// void RTPtrCheckAnalysis::processAccesses( +// AccessPtrAnalysis::AccessToUnderlyingObjMap &AccessToObjs) { +// // The AliasSetTracker has nicely partitioned our pointers by metadata +// // compatibility and potential for underlying-object overlap. As a result, we +// // only need to check for potential pointer dependencies within each alias +// // set. +// for (auto &AS : AST) { +// // Note that both the alias-set tracker and the alias sets themselves used +// // linked lists internally and so the iteration order here is deterministic +// // (matching the original instruction order within each set). + +// bool SetHasWrite = false; + +// // Map of pointers to last access encountered. +// UnderlyingObjToAccessMap ObjToLastAccess; + +// // Set of access to check after all writes have been processed. +// SetVector DeferredAccesses; + +// // Iterate over each alias set twice, once to process read/write pointers, +// // and then to process read-only pointers. +// for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { +// bool UseDeferred = SetIteration > 0; +// SetVector &S = UseDeferred ? DeferredAccesses : Accesses; + +// for (auto AV : AS) { +// Value *Ptr = AV.getValue(); +// LLVM_DEBUG(dbgs() << "Found pointer is alias set: " << *Ptr << "\n"); + +// // For a single memory access in AliasSetTracker, Accesses may contain +// // both read and write, and they both need to be handled for CheckDeps. +// for (auto AC : S) { +// LLVM_DEBUG(dbgs() << " Access pointer: " << *AC.getPointer() << "\n"); +// if (AC.getPointer() != Ptr) +// continue; + +// bool IsWrite = AC.getInt(); + +// // If we're using the deferred access set, then it contains only +// // reads. +// bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; +// if (UseDeferred && !IsReadOnlyPtr) +// continue; +// // Otherwise, the pointer must be in the PtrAccessSet, either as a +// // read or a write. +// assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || +// S.count(MemAccessInfo(Ptr, false))) && +// "Alias-set pointer not in the access set?"); + +// MemAccessInfo Access(Ptr, IsWrite); +// DepCands.insert(Access); + +// // Memorize read-only pointers for later processing and skip them in +// // the first round (they need to be checked after we have seen all +// // write pointers). Note: we also mark pointer that are not +// // consecutive as "read-only" pointers (so that we check +// // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". +// if (!UseDeferred && IsReadOnlyPtr) { +// DeferredAccesses.insert(Access); +// continue; +// } + +// // If this is a write - check other reads and writes for conflicts. If +// // this is a read only check other writes for conflicts (but only if +// // there is no other write to the ptr - this is an optimization to +// // catch "a[i] = a[i] + " without having to do a dependence check). +// if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { +// CheckDeps.push_back(Access); +// IsRTCheckAnalysisNeeded = true; +// } + +// if (IsWrite) +// SetHasWrite = true; + +// for (const Value *Obj : AccessToObjs[ +// AccessPtrAnalysis::MemAccessInfo(Ptr, IsWrite)]) { +// UnderlyingObjToAccessMap::iterator Prev = +// ObjToLastAccess.find(Obj); +// if (Prev != ObjToLastAccess.end()) +// DepCands.unionSets(Access, Prev->second); + +// ObjToLastAccess[Obj] = Access; +// } +// } +// } +// } +// } +// } + +// // This code is borrowed from LoopAccessAnalysis.cpp +// bool RTPtrCheckAnalysis::createCheckForAccess( +// MemAccessInfo Access, Type *AccessTy, DenseMap &DepSetId, +// unsigned &RunningDepId, unsigned ASId, bool ShouldCheckWrap, bool Assume) { +// Value *Ptr = Access.getPointer(); + +// if (!hasComputableBounds(PSE, SymbolicStrides, Ptr, TheLoop, Assume)) +// return false; + +// // When we run after a failing dependency check we have to make sure +// // we don't have wrapping pointers. +// if (ShouldCheckWrap && +// !isNoWrap(PSE, SymbolicStrides, Ptr, AccessTy, TheLoop)) { +// auto *Expr = PSE.getSCEV(Ptr); +// if (!Assume || !isa(Expr)) +// return false; +// PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); +// } + +// // The id of the dependence set. +// unsigned DepId; + +// if (isDependencyCheckNeeded()) { +// Value *Leader = DepCands.getLeaderValue(Access).getPointer(); +// unsigned &LeaderId = DepSetId[Leader]; +// if (!LeaderId) +// LeaderId = RunningDepId++; +// DepId = LeaderId; +// } else +// // Each access has its own dependence set. +// DepId = RunningDepId++; + +// bool IsWrite = Access.getInt(); +// RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, SymbolicStrides, PSE); +// LLVM_DEBUG(dbgs() << "TapirRD: Found a runtime check ptr:" << *Ptr << '\n'); + +// return true; +// } + +// // This code is borrowed from LoopAccessAnalysis.cpp +// bool RTPtrCheckAnalysis::canCheckPtrAtRT(bool ShouldCheckWrap) { +// // Find pointers with computable bounds. We are going to use this information +// // to place a runtime bound check. +// bool CanDoRT = true; + +// bool NeedRTCheck = false; +// if (!IsRTCheckAnalysisNeeded) return true; + +// bool IsDepCheckNeeded = isDependencyCheckNeeded(); + +// // We assign a consecutive id to access from different alias sets. +// // Accesses between different groups doesn't need to be checked. +// unsigned ASId = 1; +// for (auto &AS : AST) { +// int NumReadPtrChecks = 0; +// int NumWritePtrChecks = 0; +// bool CanDoAliasSetRT = true; + +// // We assign consecutive id to access from different dependence sets. +// // Accesses within the same set don't need a runtime check. +// unsigned RunningDepId = 1; +// DenseMap DepSetId; + +// SmallVector Retries; + +// for (auto A : AS) { +// Value *Ptr = A.getValue(); +// bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); +// MemAccessInfo Access(Ptr, IsWrite); + +// if (IsWrite) +// ++NumWritePtrChecks; +// else +// ++NumReadPtrChecks; + +// if (!createCheckForAccess(Access, DepSetId, RunningDepId, ASId, +// ShouldCheckWrap, false)) { +// LLVM_DEBUG(dbgs() << "TapirRD: Can't find bounds for ptr:" << *Ptr << '\n'); +// Retries.push_back(Access); +// CanDoAliasSetRT = false; +// } +// } + +// // If we have at least two writes or one write and a read then we need to +// // check them. But there is no need to checks if there is only one +// // dependence set for this alias set. +// // +// // Note that this function computes CanDoRT and NeedRTCheck independently. +// // For example CanDoRT=false, NeedRTCheck=false means that we have a pointer +// // for which we couldn't find the bounds but we don't actually need to emit +// // any checks so it does not matter. +// bool NeedsAliasSetRTCheck = false; +// if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) +// NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 || +// (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1)); + +// // We need to perform run-time alias checks, but some pointers had bounds +// // that couldn't be checked. +// if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { +// // Reset the CanDoSetRt flag and retry all accesses that have failed. +// // We know that we need these checks, so we can now be more aggressive +// // and add further checks if required (overflow checks). +// CanDoAliasSetRT = true; +// for (auto Access : Retries) +// if (!createCheckForAccess(Access, DepSetId, RunningDepId, ASId, +// ShouldCheckWrap, /*Assume=*/true)) { +// CanDoAliasSetRT = false; +// break; +// } +// } + +// CanDoRT &= CanDoAliasSetRT; +// NeedRTCheck |= NeedsAliasSetRTCheck; +// ++ASId; +// } + +// // If the pointers that we would use for the bounds comparison have different +// // address spaces, assume the values aren't directly comparable, so we can't +// // use them for the runtime check. We also have to assume they could +// // overlap. In the future there should be metadata for whether address spaces +// // are disjoint. +// unsigned NumPointers = RtCheck.Pointers.size(); +// for (unsigned i = 0; i < NumPointers; ++i) { +// for (unsigned j = i + 1; j < NumPointers; ++j) { +// // Only need to check pointers between two different dependency sets. +// if (RtCheck.Pointers[i].DependencySetId == +// RtCheck.Pointers[j].DependencySetId) +// continue; +// // Only need to check pointers in the same alias set. +// if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) +// continue; + +// Value *PtrI = RtCheck.Pointers[i].PointerValue; +// Value *PtrJ = RtCheck.Pointers[j].PointerValue; + +// unsigned ASi = PtrI->getType()->getPointerAddressSpace(); +// unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); +// if (ASi != ASj) { +// LLVM_DEBUG( +// dbgs() << "TapirRD: Runtime check would require comparison between" +// " different address spaces\n"); +// return false; +// } +// } +// } + +// if (NeedRTCheck && CanDoRT) +// RtCheck.generateChecks(DepCands, IsDepCheckNeeded); + +// LLVM_DEBUG(dbgs() << "TapirRD: We need to do " << RtCheck.getNumberOfChecks() +// << " pointer comparisons.\n"); + +// RtCheck.Need = NeedRTCheck; + +// bool CanDoRTIfNeeded = !NeedRTCheck || CanDoRT; +// if (!CanDoRTIfNeeded) +// RtCheck.reset(); +// return CanDoRTIfNeeded; +// } + +// void AccessPtrAnalysis::getRTPtrChecks(Loop *L, RaceInfo::ResultTy &Result, +// RaceInfo::PtrChecksTy &AllPtrRtChecks) { +// LLVM_DEBUG(dbgs() << "getRTPtrChecks: " << *L << "\n"); + +// AllPtrRtChecks[L] = std::make_unique(&SE); + +// RTPtrCheckAnalysis RPCA(L, *AllPtrRtChecks[L].get(), AA, SE); +// SmallPtrSet Seen; +// // First handle all stores +// for (GeneralAccess GA : LoopAccessMap[L]) { +// // Exclude accesses not involved in a local race +// if (!Result.count(GA.I) || +// !RaceInfo::isLocalRace(Result.getRaceType(GA.I))) +// continue; + +// if (GA.isMod()) { +// RPCA.addAccess(GA); +// if (GA.getPtr()) +// Seen.insert(GA.getPtr()); +// } +// } +// // Now handle loads, checking if any pointers are only read from +// for (GeneralAccess GA : LoopAccessMap[L]) { +// // Exclude accesses not involved in a local race +// if (!Result.count(GA.I) || +// !RaceInfo::isLocalRace(Result.getRaceType(GA.I))) +// continue; + +// if (!GA.isMod()) { +// if (!GA.getPtr()) +// RPCA.addAccess(GA); + +// RPCA.addAccess(GA, !Seen.count(GA.getPtr())); +// } +// } + +// RPCA.processAccesses(AccessToObjs); +// // TODO: Do something with CanDoRTIfNeeded +// } + +void AccessPtrAnalysis::processAccessPtrs( + RaceInfo::ResultTy &Result, RaceInfo::ObjectMRTy &ObjectMRForRace, + RaceInfo::PtrChecksTy &AllPtrRtChecks) { + TI.evaluateParallelState(MPTasks); + TI.evaluateParallelState(MPTasksInLoop); + + // using InstPtrPair = std::pair; + // SmallPtrSet Visited; + for (const Spindle *S : + depth_first(TI.getRootTask()->getEntrySpindle())) { + for (GeneralAccess GA : SpindleAccessMap[S]) { + // InstPtrPair Visit = + // std::make_pair(GA.I, GA.getPtr()); + // // Skip instructions we've already visited. + // if (!Visited.insert(Visit).second) + // continue; + + if (!GA.getPtr()) { + if (const CallBase *Call = dyn_cast(GA.I)) { + if (!Call->onlyAccessesArgMemory() && + !(AssumeSafeMalloc && + (isAllocFn(Call, TLI) || isFreeFn(Call, TLI)))) { + LLVM_DEBUG(dbgs() << "Setting opaque race:\n" + << " GA.I: " << *GA.I << "\n" + << " no explicit racer\n"); + Result.recordOpaqueRace(GA, GeneralAccess()); + } + } + } + + // Check for aliasing against the function arguments. + for (Value *ArgPtr : ArgumentPtrs) { + LLVM_DEBUG({ + dbgs() << "Checking instruction against arg pointer:\n" + << " GA.I: " << *GA.I << "\n" + << " Arg: " << *ArgPtr << "\n"; + }); + if (!GA.getPtr()) { + ModRefInfo MRI = + AA->getModRefInfo(GA.I, MemoryLocation::getBeforeOrAfter(ArgPtr)); + Argument *Arg = cast(ArgPtr); + if (isModSet(MRI) && !Arg->onlyReadsMemory()) { + LLVM_DEBUG(dbgs() << " Mod is set.\n"); + Result.recordRaceViaAncestorRef(GA, GeneralAccess()); + Result.recordRaceViaAncestorMod(GA, GeneralAccess()); + setObjectMRForRace(ObjectMRForRace, ArgPtr, ModRefInfo::ModRef); + } + if (isRefSet(MRI)) { + LLVM_DEBUG(dbgs() << " Ref is set.\n"); + Result.recordRaceViaAncestorMod(GA, GeneralAccess()); + setObjectMRForRace(ObjectMRForRace, ArgPtr, ModRefInfo::Mod); + } + } else { + MemoryLocation GALoc = *GA.Loc; + if (AA->alias(GALoc, MemoryLocation::getBeforeOrAfter(ArgPtr))) { + Argument *Arg = cast(ArgPtr); + if (GA.isMod() && !Arg->onlyReadsMemory()) { + LLVM_DEBUG(dbgs() << " Mod is set.\n"); + Result.recordRaceViaAncestorRef(GA, GeneralAccess()); + Result.recordRaceViaAncestorMod(GA, GeneralAccess()); + setObjectMRForRace(ObjectMRForRace, ArgPtr, ModRefInfo::ModRef); + } + if (GA.isRef()) { + LLVM_DEBUG(dbgs() << " Ref is set.\n"); + Result.recordRaceViaAncestorMod(GA, GeneralAccess()); + setObjectMRForRace(ObjectMRForRace, ArgPtr, ModRefInfo::Mod); + } + } + } + } + } + } + checkForRacesHelper(TI.getRootTask(), Result, ObjectMRForRace); + + // Based on preliminary experiments, it doesn't appear that getRTPtrChecks, + // which is adapted from LoopAccessAnalysis, comes up with enough runtime + // pointer checks often enough to be worthwhile. It might be worth revisiting + // this code later. + + // for (Loop *TopLevelLoop : LI) { + // for (Loop *L : depth_first(TopLevelLoop)) { + // PredicatedScalarEvolution PSE(SE, *L); + // if (canAnalyzeLoop(L, PSE)) + // getRTPtrChecks(L, Result, AllPtrRtChecks); + // } + // } +} + +RaceInfo::RaceInfo(Function *F, DominatorTree &DT, LoopInfo &LI, TaskInfo &TI, + DependenceInfo &DI, ScalarEvolution &SE, + const TargetLibraryInfo *TLI) + : F(F), DT(DT), LI(LI), TI(TI), DI(DI), SE(SE), TLI(TLI) { + analyzeFunction(); +} + +void RaceInfo::getObjectsFor(Instruction *I, + SmallPtrSetImpl &Objects) { + SmallVector GA; + GetGeneralAccesses(I, GA, DI.getAA(), TLI); + for (GeneralAccess Acc : GA) { + // Skip this access if it does not have a valid pointer. + if (!Acc.getPtr()) + continue; + + getObjectsFor(MemAccessInfo(Acc.getPtr(), Acc.isMod()), Objects); + } +} + +void RaceInfo::getObjectsFor(MemAccessInfo Access, + SmallPtrSetImpl &Objects) { + for (const Value *Obj : AccessToObjs[Access]) + Objects.insert(Obj); +} + +void RaceInfo::print(raw_ostream &OS) const { + if (Result.empty()) { + OS << "No possible races\n"; + return; + } + RaceType OverallRT = getOverallRaceType(); + OS << "Overall race type: "; + printRaceType(OverallRT, OS); + OS << "\n"; + for (auto Res : Result) { + OS << " Result: " << *Res.first << "\n"; + for (auto &RD : Res.second) { + if (RD.getPtr()) + OS << " ptr: " << *RD.getPtr(); + else + OS << " nullptr"; + OS << "\n"; + printRaceType(RD.Type, OS.indent(6)); + if (RD.Racer.isValid()) { + OS << "\n Racer:"; + OS << "\n I = " << *RD.Racer.I; + OS << "\n Loc = "; + if (!RD.Racer.Loc) + OS << "nullptr"; + else if (RD.Racer.Loc->Ptr == RD.getPtr()) + OS << "same pointer"; + else + OS << *RD.Racer.Loc->Ptr; + OS << "\n OperandNum = "; + if (RD.Racer.OperandNum == static_cast(-1)) + OS << "none"; + else + OS << RD.Racer.OperandNum; + OS << "\n ModRef = " << (RD.Racer.isMod() ? "Mod " : "") + << (RD.Racer.isRef() ? "Ref" : ""); + } + else + OS << "\n Opaque racer"; + OS << "\n"; + } + } + OS << "Underlying objects of races:\n"; + for (auto Res : ObjectMRForRace) { + OS << *Res.first << "\n "; + if (isModSet(Res.second)) + OS << " Mod"; + if (isRefSet(Res.second)) + OS << " Ref"; + OS << "\n"; + } +} + +// The main analysis routine. +void RaceInfo::analyzeFunction() { + LLVM_DEBUG(dbgs() << "Analyzing function '" << F->getName() << "'\n"); + + // At a high level, we need to identify pairs of instructions that might + // execute in parallel and alias. + + AccessPtrAnalysis APA(DT, TI, LI, DI, SE, TLI, AccessToObjs); + // Record pointer arguments to this function + for (Argument &Arg : F->args()) + if (Arg.getType()->isPtrOrPtrVectorTy()) + APA.addFunctionArgument(&Arg); + // TODO: Add global variables to APA. + + for (BasicBlock &BB : *F) { + for (Instruction &I : BB.instructionsWithoutDebug()) { + if (I.mayReadFromMemory() || I.mayWriteToMemory()) { + if (checkInstructionForRace(&I, TLI)) + APA.addAccess(&I); + } + } + } + + APA.processAccessPtrs(Result, ObjectMRForRace, AllPtrRtChecks); +} diff --git a/llvm/lib/Analysis/TapirTaskInfo.cpp b/llvm/lib/Analysis/TapirTaskInfo.cpp new file mode 100644 index 00000000000000..c89ad33133af30 --- /dev/null +++ b/llvm/lib/Analysis/TapirTaskInfo.cpp @@ -0,0 +1,1846 @@ +//===- TapirTaskInfo.cpp - Tapir task calculator --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TapirTaskInfo class that is used to identify parallel +// tasks and spindles in Tapir. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/ScopeExit.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PrintPasses.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "task-info" + +// Statistics +STATISTIC(NumBasicBlocks, "Number of basic blocks analyzed."); +STATISTIC(NumTasks, "Number of tasks found."); +STATISTIC(NumSpindles, "Number of spindles found."); +STATISTIC(NumSharedEHSpindles, "Number of shared exception-handling spindles " + "found in this function."); +STATISTIC(NumBasicBlocksInPF, + "Number of basic blocks analyzed in parallel functions."); +STATISTIC(NumTasksInPF, "Number of tasks found in parallel functions."); +STATISTIC(NumSpindlesInPF, "Number of spindles found in parallel functions."); + +// Always verify taskinfo if expensive checking is enabled. +#ifdef EXPENSIVE_CHECKS +bool llvm::VerifyTaskInfo = true; +#else +bool llvm::VerifyTaskInfo = false; +#endif +static cl::opt + VerifyTaskInfoX("verify-task-info", cl::location(VerifyTaskInfo), + cl::Hidden, cl::desc("Verify task info (time consuming)")); + +static cl::opt PrintTaskFrameTree( + "print-taskframe-tree", cl::init(false), + cl::Hidden, cl::desc("Print tree of task frames.")); + +static cl::opt PrintMayHappenInParallel( + "print-may-happen-in-parallel", cl::init(false), + cl::Hidden, cl::desc("Print may-happen-in-parallel analysis results " + "derived from Tapir control flow.")); + +/// Returns the taskframe.create at the start of BB if one exists, nullptr +/// otherwise. +static const Instruction *getTaskFrameCreate(const BasicBlock *BB) { + if (const IntrinsicInst *II = dyn_cast(&BB->front())) + if (Intrinsic::taskframe_create == II->getIntrinsicID()) + return &BB->front(); + return nullptr; +} +static Instruction *getTaskFrameCreate(BasicBlock *BB) { + return const_cast( + getTaskFrameCreate(const_cast(BB))); +} + +static bool isCanonicalTaskFrameEnd(const Instruction *TFEnd) { + // Check that the last instruction in the basic block containing TFEnd is + // TFEnd. + const Instruction *Term = &TFEnd->getParent()->back(); + if (!Term || isa(Term) || isa(Term)) + return false; + + const Instruction *Prev = Term->getPrevNode(); + if (!Prev || Prev != TFEnd) + return false; + + return true; +} + +// Check if the given instruction is an intrinsic with the specified ID. If a +// value \p V is specified, then additionally checks that the first argument of +// the intrinsic matches \p V. This function matches the behavior of +// isTapirIntrinsic in Transforms/Utils/TapirUtils. +static bool isTapirIntrinsic(Intrinsic::ID ID, const Instruction *I, + const Value *V = nullptr) { + if (const CallBase *CB = dyn_cast(I)) + if (const Function *Called = CB->getCalledFunction()) + if (ID == Called->getIntrinsicID()) + if (!V || (V == CB->getArgOperand(0))) + return true; + return false; +} + +// Check if the basic block terminates a taskframe via a taskframe.end. +static bool endsUnassociatedTaskFrame(const BasicBlock *B) { + const Instruction *Prev = B->getTerminator()->getPrevNode(); + if (!Prev) + return false; + if (isTapirIntrinsic(Intrinsic::taskframe_end, Prev) && + isCanonicalTaskFrameEnd(Prev)) + return true; + return false; +} + +/// Checks if the given taskframe.create instruction is in canonical form. This +/// function mirrors the behavior of needToSplitTaskFrameCreate in +/// Transforms/Utils/TapirUtils. +static bool isCanonicalTaskFrameCreate(const Instruction *TFCreate) { + // If the taskframe.create is not the first instruction, split. + if (TFCreate != &TFCreate->getParent()->front()) + return false; + + // The taskframe.create is at the front of the block. Check that we have a + // single predecessor. + const BasicBlock *Pred = TFCreate->getParent()->getSinglePredecessor(); + if (!Pred) + return false; + + // Check that the single predecessor has a single successor. + if (!Pred->getSingleSuccessor()) + return false; + + // Check whether the single predecessor is terminated with a sync. + if (isa(Pred->getTerminator())) + return false; + + // If the taskframe.create has no users, ignore it. + if (TFCreate->user_empty()) + return false; + + // Check that the uses of the taskframe.create are canonical as well. + for (const User *U : TFCreate->users()) { + if (const Instruction *I = dyn_cast(U)) { + if (isTapirIntrinsic(Intrinsic::taskframe_use, I) || + isTapirIntrinsic(Intrinsic::taskframe_resume, I)) + return true; + if (isTapirIntrinsic(Intrinsic::taskframe_end, I)) + return isCanonicalTaskFrameEnd(I); + } + } + return true; +} + +/// Returns true if the given instruction performs a taskframe resume, false +/// otherwise. +static bool isDetachedRethrow(const Instruction *I, + const Value *SyncReg = nullptr) { + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID()) + if (!SyncReg || (SyncReg == II->getArgOperand(0))) + return true; + return false; +} + +/// Returns true if the given instruction performs a taskframe resume, false +/// otherwise. +static bool isTaskFrameResume(const Instruction *I, + const Value *TaskFrame = nullptr) { + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::taskframe_resume == Called->getIntrinsicID()) + if (!TaskFrame || (TaskFrame == II->getArgOperand(0))) + return true; + return false; +} + +//===----------------------------------------------------------------------===// +// Spindle implementation +// + +/// Return true if this spindle is a shared EH spindle. +bool Spindle::isSharedEH() const { + return getParentTask()->containsSharedEH(this); +} + +/// Return true if this spindle is the continuation of a detached task. +bool Spindle::isTaskContinuation() const { + for (const Spindle *Pred : predecessors(this)) + if (predInDifferentTask(Pred)) + return true; + return false; +} + +/// Return true if the successor spindle Succ is part of the same task as this +/// spindle. +bool Spindle::succInSameTask(const Spindle *Succ) const { + // If this spindle is a shared EH spindle, the successor must be a shared EH + // spindle tracked by the same task. + if (isSharedEH()) + return (Succ->isSharedEH() && (getParentTask() == Succ->getParentTask())); + + // Otherwise we have an ordinary spindle. If this spindle and Succ are both + // properly contained in ParentTask, return true. + if (getParentTask()->contains(Succ)) + return true; + else { + // Otherwise, check if Succ is a shared EH spindle tracked by the parent of + // ParentTask. + return getParentTask()->isSharedEHExit(Succ); + } +} + +/// Return true if the successor spindle Succ is in a subtask of the task +/// containing this spindle. +bool Spindle::succInSubTask(const Spindle *Succ) const { + return (Succ->getParentTask()->getParentTask() == getParentTask()); +} + +/// Return the taskframe.create intrinsic at the start of the entry block of +/// this Spindle, or nullptr if no such intrinsic exists. +Value *Spindle::getTaskFrameCreate() const { + if (Instruction *TFCreate = ::getTaskFrameCreate(getEntry())) + if (isCanonicalTaskFrameCreate(TFCreate)) + return TFCreate; + return nullptr; +} + +/// Return the task associated with this taskframe, or nullptr of this spindle +/// is not a taskframe. +Task *Spindle::getTaskFromTaskFrame() const { + if (TaskFrameUser) return TaskFrameUser; + if (getParentTask()->getEntrySpindle() == this) return getParentTask(); + return nullptr; +} + +BasicBlock *Spindle::getTaskFrameContinuation() const { + // If this taskframe is used by a task, return that task's continuation. + if (TaskFrameUser) + return TaskFrameUser->getContinuationSpindle()->getEntry(); + + Value *TFCreate = getTaskFrameCreate(); + if (!TFCreate) + return nullptr; + // Scan the uses of the taskframe.create for a canonical taskframe.end. + for (User *U : TFCreate->users()) + if (Instruction *I = dyn_cast(U)) { + if (isTapirIntrinsic(Intrinsic::taskframe_end, I) && + isCanonicalTaskFrameEnd(I)) + return I->getParent()->getSingleSuccessor(); + } + return nullptr; +} + +//===----------------------------------------------------------------------===// +// Task implementation +// + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void Task::dump() const { print(dbgs()); } + +LLVM_DUMP_METHOD void Task::dumpVerbose() const { + print(dbgs(), /*Depth=*/0, /*Verbose=*/true); +} +#endif + +// Get the shared EH spindles that this task can exit to and append them to +// SpindleVec. +void Task::getSharedEHExits(SmallVectorImpl &SpindleVec) const { + if (isRootTask()) return; + + // NOTE: We assume that all shared-eh exit spindles are contained in ancestors + // of this task, which might not be true if the shared-eh exit spindles + // themselves detach. It's not clear how this case could arise in practice, + // however. + SmallPtrSet Ancestors; + bool tracksSharedEHSpindles = false; + const Task *Parent = this; + do { + Parent = Parent->getParentTask(); + Ancestors.insert(Parent); + tracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); + } while (!Parent->isRootTask()); + if (!tracksSharedEHSpindles) return; + + // Scan the successors of the spindles in this task to find shared EH exits. + SmallVector WorkList; + SmallPtrSet Visited; + for (Spindle *S : getSpindles()) + for (Spindle *Succ : successors(S)) + if (Succ->isSharedEH() && Ancestors.contains(Succ->getParentTask())) + WorkList.push_back(Succ); + + // Perform a DFS of the shared EH exits to push each one onto SpindleVec and + // continue searching for more shared EH exits. + while (!WorkList.empty()) { + Spindle *EHExit = WorkList.pop_back_val(); + if (!Visited.insert(EHExit).second) continue; + + // Push EHExit onto SpindleVec. + SpindleVec.push_back(EHExit); + + // Scan the successors of EHExit for more shared EH exits. + for (Spindle *Succ : successors(EHExit)) + if (Succ->isSharedEH()) + WorkList.push_back(Succ); + } +} + +/// Returns true if SharedEH is a shared EH exit of this task. +bool Task::isSharedEHExit(const Spindle *SharedEH) const { + if (isRootTask()) return false; + if (!SharedEH->isSharedEH()) return false; + + // NOTE: We assume that all shared-eh exit spindles are contained in ancestors + // of this task, which might not be true if the shared-eh exit spindles + // themselves detach. It's not clear how this case could arise in practice, + // however. + SmallPtrSet Ancestors; + bool tracksSharedEHSpindles = false; + const Task *Parent = this; + do { + Parent = Parent->getParentTask(); + Ancestors.insert(Parent); + tracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); + } while (!Parent->isRootTask()); + if (!tracksSharedEHSpindles) return false; + + Task *SharedEHParent = SharedEH->getParentTask(); + if (!Ancestors.contains(SharedEHParent)) + return false; + + // Scan the successors of the spindles in this task to find shared EH exits. + SmallVector WorkList; + SmallPtrSet Visited; + for (Spindle *S : getSpindles()) + for (Spindle *Succ : successors(S)) + if (SharedEH == Succ) + return true; + else if (Succ->isSharedEH() && Ancestors.contains(Succ->getParentTask())) + WorkList.push_back(Succ); + + // Perform a DFS of the shared EH exits to push each one onto SpindleVec and + // continue searching for more shared EH exits. + while (!WorkList.empty()) { + Spindle *EHExit = WorkList.pop_back_val(); + if (!Visited.insert(EHExit).second) continue; + + // Check if this exit is the shared EH exit we're looking for. + if (SharedEH == EHExit) + return true; + + // Scan the successors of EHExit for more shared EH exits. + for (Spindle *Succ : successors(EHExit)) + if (Succ->isSharedEH()) + WorkList.push_back(Succ); + } + + return false; +} + +//===----------------------------------------------------------------------===// +// TaskInfo implementation +// + +// Add the unassociated spindles to the task T in order of a DFS CFG traversal +// starting at the entry block of T. +static void +AssociateWithTask(TaskInfo *TI, Task *T, + SmallPtrSetImpl &UnassocSpindles) { + SmallVector WorkList; + SmallPtrSet Visited; + // Add the successor spindles of the entry block of T to the worklist. + Spindle *Entry = T->getEntrySpindle(); + for (BasicBlock *Exit : Entry->spindle_exits()) + for (BasicBlock *Child : successors(Exit)) + if (Spindle *S = TI->getSpindleFor(Child)) + if (UnassocSpindles.count(S)) + WorkList.push_back(S); + + // Perform a DFS CFG traversal of the spindles associated with task T, and add + // each spindle to T in that order. + while (!WorkList.empty()) { + Spindle *S = WorkList.pop_back_val(); + if (!Visited.insert(S).second) continue; + + // Add the spindle S to T. + LLVM_DEBUG(dbgs() << "Adding spindle@" << S->getEntry()->getName() + << " to task@" << Entry->getEntry()->getName() << "\n"); + TI->addSpindleToTask(S, T); + + // Add the successor spindles of S that are associated with T to the + // worklist. + for (BasicBlock *Exit : S->spindle_exits()) + for (BasicBlock *Child : successors(Exit)) + if (Spindle *S = TI->getSpindleFor(Child)) + if (UnassocSpindles.count(S)) + WorkList.push_back(S); + } + + // We can have remaining unassociated spindles when subtasks share + // exception-handling spindles. + for (Spindle *S : UnassocSpindles) + if (!Visited.count(S)) { + TI->addEHSpindleToTask(S, T); + ++NumSharedEHSpindles; + } + + assert(T->getNumSpindles() + T->getNumSharedEHSpindles() == + UnassocSpindles.size() + 1 && + "Not all unassociated spindles were associated with task."); +} + +// Add the unassociated blocks to the spindle S in order of a DFS CFG traversal +// starting at the entry block of S. +static void +AssociateWithSpindle(TaskInfo *TI, Spindle *S, + SmallPtrSetImpl &UnassocBlocks) { + SmallVector WorkList; + SmallPtrSet Visited; + // Add the successor blocks of the entry of S to the worklist. + for (BasicBlock *Child : successors(S->getEntry())) + if (UnassocBlocks.count(Child)) + WorkList.push_back(Child); + + // Perform a DFS CFG traversal of the blocks associated with spindle S, and + // add each block to S in that order. + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) continue; + + // Add the block BB to S. + TI->addBlockToSpindle(*BB, S); + + // Add the successors of block BB that are associated with S to the + // worklist. + for (BasicBlock *Child : successors(BB)) + if (UnassocBlocks.count(Child)) + WorkList.push_back(Child); + } + + assert(S->getNumBlocks() == UnassocBlocks.size() + 1 && + "Not all unassociated blocks were associated with spindle."); +} + +// Helper function to add spindle edges to spindles. +static void computeSpindleEdges(TaskInfo *TI) { + // Walk all spindles in the CFG to find all spindle edges. + SmallVector WorkList; + SmallPtrSet Visited; + + WorkList.push_back(TI->getRootTask()->getEntrySpindle()); + while (!WorkList.empty()) { + Spindle *S = WorkList.pop_back_val(); + + if (!Visited.insert(S).second) continue; + + // Examine all outgoing CFG edges from this spindle and create a spindle + // edge for each one. Filter out self-edges. + for (BasicBlock *Exit : S->spindle_exits()) { + for (BasicBlock *SB : successors(Exit)) { + Spindle *Succ = TI->getSpindleFor(SB); + if (Succ != S) { + S->addSpindleEdgeTo(Succ, Exit); + // Add this successor spindle for processing. + WorkList.push_back(Succ); + } + } + } + } +} + +// Search the PHI nodes in BB for a user of Val. Return Val if no PHI node in +// BB uses Val. +static Value *FindUserAmongPHIs(Value *Val, BasicBlock *BB) { + for (PHINode &PN : BB->phis()) { + if (Val->getType() != PN.getType()) + continue; + for (Value *Incoming : PN.incoming_values()) + if (Incoming == Val) + return &PN; + } + return Val; +} + +// Helper function to record the normal and exceptional continuation spindles +// for each task. +static void recordContinuationSpindles(TaskInfo *TI) { + for (Task *T : post_order(TI->getRootTask())) { + if (T->isRootTask()) + continue; + + DetachInst *DI = T->getDetach(); + Spindle *S = TI->getSpindleFor(DI->getParent()); + + // Set the continuation spindle for the spawned task. + T->setContinuationSpindle(TI->getSpindleFor(DI->getContinue())); + + // If the detach has an unwind destination, set the exceptional continuation + // spindle for the spawned task. + if (DI->hasUnwindDest()) { + BasicBlock *Unwind = DI->getUnwindDest(); + // We also follow the use-def chain for the landingpad of the + // detach-unwind to determine the value of the landingpad in the + // exceptional continuation. + Value *LPadVal = Unwind->getLandingPadInst(); + // There should be no substantive code between the detach unwind and the + // exceptional continuation. Instead, we expect a sequence of basic + // blocks in the parent spindle S that merges control flow from different + // exception-handling code together. Each basic block in this sequence + // should have a unique successor, and the landingpad of the unwind + // destination should propagate to the exceptional continuation through + // PHI nodes in these blocks. + while (TI->getSpindleFor(Unwind) == S) { + assert(Unwind->getUniqueSuccessor() && + "Unwind destination of detach has many successors, but belongs to " + "the same spindle as the detach."); + Unwind = Unwind->getUniqueSuccessor(); + LPadVal = FindUserAmongPHIs(LPadVal, Unwind); + } + // Set the exceptional continuation spindle for this task. + Spindle *UnwindSpindle = TI->getSpindleFor(Unwind); + LLVM_DEBUG({ + // Check that Task T is indeed a predecessor of this unwind spindle. + bool TaskIsPredecessor = false; + for (Spindle *Pred : predecessors(UnwindSpindle)) { + if (TI->getTaskFor(Pred) == T) { + TaskIsPredecessor = true; + break; + } + } + if (!TaskIsPredecessor) + // Report that an unusual exceptional continuation was found. This + // can happen, for example, due to splitting of landing pads or when + // part of the CFG becomes disconnected due to function inlining. + dbgs() << "TaskInfo: Found exceptional continuation at " + << Unwind->getName() << " with no predecessors in task\n"; + }); + T->setEHContinuationSpindle(UnwindSpindle, LPadVal); + } + } +} + +static bool shouldCreateSpindleAtDetachUnwind(const BasicBlock *MaybeUnwind, + const TaskInfo &TI, + const DominatorTree &DT) { + // Check that MaybeUnwind is a detach-unwind block. + if (!MaybeUnwind->isLandingPad()) + return false; + const BasicBlock *Pred = MaybeUnwind->getSinglePredecessor(); + if (!Pred) { + unsigned NumReachablePredecessors = 0; + for (const BasicBlock *P : predecessors(MaybeUnwind)) { + if (DT.isReachableFromEntry(P)) { + ++NumReachablePredecessors; + Pred = P; + } + } + if (NumReachablePredecessors > 1) + return false; + } + if (!isa(Pred->getTerminator())) + return false; + + const BasicBlock *UnwindSpindleEntry = MaybeUnwind; + // First suppose that a more appropriate detach-unwind spindle entry exists + // later on the chain of unique successors of Unwind. Traverse this chain of + // unique successors of Unwind until we find a spindle entry. + while (!TI.getSpindleFor(UnwindSpindleEntry)) { + if (isa(UnwindSpindleEntry->getTerminator())) + // We found a sync instruction terminating a basic block along the chain + // of unique successors of Unwind. Such a sync instruction should appear + // within a detach-unwind spindle. + return true; + + const BasicBlock *Succ = UnwindSpindleEntry->getUniqueSuccessor(); + if (!Succ) + // We discovered a basic block without a unique successor before we found + // an appropriate detach-unwind spindle entry. Return true, so a new + // detach-unwind spindle entry will be created. + return true; + UnwindSpindleEntry = Succ; + } + + // Check the type of spindle discovered, to make sure it's appropriate for a + // detach-unwind spindle. + const Spindle *S = TI.getSpindleFor(UnwindSpindleEntry); + return !S->isPhi(); +} + +static bool isTaskFrameCreateSpindleEntry(const BasicBlock *B) { + if (const Instruction *TFCreate = getTaskFrameCreate(B)) + if (isCanonicalTaskFrameCreate(TFCreate)) + return true; + return false; +} + +void TaskInfo::analyze(Function &F, DominatorTree &DomTree) { + // We first compute defining blocks and IDFs based on the detach and sync + // instructions. + DenseMap BBNumbers; + unsigned NextBBNum = 0; + int64_t BBCount = 0, SpindleCount = 0, TaskCount = 0; + SmallPtrSet DefiningBlocks; + // Go through each block to figure out where tasks begin and where sync + // instructions occur. + for (BasicBlock &B : F) { + BBCount++; + BBNumbers[&B] = NextBBNum++; + if (&F.getEntryBlock() == &B) { + DefiningBlocks.insert(&B); + // Create a spindle and root task for the entry block. + Spindle *S = createSpindleWithEntry(&B, Spindle::SPType::Entry); + SpindleCount++; + RootTask = createTaskWithEntry(S, DomTree); + TaskCount++; + } + if (DetachInst *DI = dyn_cast(B.getTerminator())) { + BasicBlock *TaskEntry = DI->getDetached(); + DefiningBlocks.insert(TaskEntry); + // Create a new spindle and task. + Spindle *S = createSpindleWithEntry(TaskEntry, Spindle::SPType::Detach); + SpindleCount++; + createTaskWithEntry(S, DomTree); + TaskCount++; + + // Create a new Phi spindle for the task continuation. We do this + // explicitly to handle cases where the spawned task does not return + // (reattach). + BasicBlock *TaskContinue = DI->getContinue(); + DefiningBlocks.insert(TaskContinue); + if (!getSpindleFor(TaskContinue)) { + createSpindleWithEntry(TaskContinue, Spindle::SPType::Phi); + SpindleCount++; + } + + // Similarly, create a new Phi spindle for the task unwind. + if (DI->hasUnwindDest()) { + BasicBlock *TaskUnwind = DI->getUnwindDest(); + DefiningBlocks.insert(TaskUnwind); + if (!getSpindleFor(TaskUnwind)) { + createSpindleWithEntry(TaskUnwind, Spindle::SPType::Phi); + SpindleCount++; + } + } + } else if (isa(B.getTerminator())) { + BasicBlock *SPEntry = B.getSingleSuccessor(); + // For sync instructions, we mark the block containing the sync + // instruction as the defining block for the sake of calculating IDF's. + // If the successor of the sync has multiple predecessors, then we want to + // allow a phi node to be created starting at that block. + DefiningBlocks.insert(&B); + // Create a new spindle. The type of this spindle might change later, if + // we discover it requires a phi. + if (!getSpindleFor(SPEntry)) { + createSpindleWithEntry(SPEntry, Spindle::SPType::Sync); + SpindleCount++; + } + assert((getSpindleFor(SPEntry)->isSync() || + getSpindleFor(SPEntry)->isPhi()) && + "Discovered early a non-sync, non-phi spindle after sync"); + } + // Create new spindles based on taskframe instrinsics. We need only work + // about taskframe.create and taskframe.resume. + if (isTaskFrameCreateSpindleEntry(&B)) { + // This block starts with a taskframe.create. Mark is as a spindle entry. + DefiningBlocks.insert(&B); + if (!getSpindleFor(&B)) { + // Create a new spindle. + createSpindleWithEntry(&B, Spindle::SPType::Phi); + SpindleCount++; + } + } + if (endsUnassociatedTaskFrame(&B)) { + BasicBlock *SPEntry = B.getSingleSuccessor(); + // This block ends with a taskframe.end. Mark its successor as a spindle + // entry. + DefiningBlocks.insert(SPEntry); + if (!getSpindleFor(SPEntry)) { + // Create a new spindle. + createSpindleWithEntry(SPEntry, Spindle::SPType::Phi); + SpindleCount++; + } + } else if (isTaskFrameResume(B.getTerminator())) { + // This block ends with a taskframe.resume invocation. Mark the unwind + // destination as a spindle entry. + InvokeInst *II = cast(B.getTerminator()); + BasicBlock *ResumeDest = II->getUnwindDest(); + DefiningBlocks.insert(ResumeDest); + if (!getSpindleFor(ResumeDest)) { + createSpindleWithEntry(ResumeDest, Spindle::SPType::Phi); + SpindleCount++; + } + } + } + NumBasicBlocks += BBCount; + NumSpindles += SpindleCount; + NumTasks += TaskCount; + bool ParallelFunc = (DefiningBlocks.size() > 1); + if (ParallelFunc) { + NumBasicBlocksInPF += BBCount; + NumSpindlesInPF += SpindleCount; + NumTasksInPF += TaskCount; + } + LLVM_DEBUG({ + dbgs() << "DefiningBlocks:\n"; + for (BasicBlock *BB : DefiningBlocks) + dbgs() << " " << BB->getName() << "\n"; + }); + + // Compute IDFs to determine additional starting points of spindles, e.g., + // continuation points and other spindle PHI-nodes. + ForwardIDFCalculator IDFs(DomTree); + IDFs.setDefiningBlocks(DefiningBlocks); + SmallVector IDFBlocks; + IDFs.calculate(IDFBlocks); + + if (IDFBlocks.size() > 1) + llvm::sort(IDFBlocks, + [&BBNumbers](const BasicBlock *A, const BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }); + + LLVM_DEBUG({ + dbgs() << "IDFBlocks:\n"; + for (BasicBlock *BB : IDFBlocks) + dbgs() << " " << BB->getName() << "\n"; + }); + + // Create spindles for all IDFBlocks. + for (BasicBlock *B : IDFBlocks) + if (Spindle *S = getSpindleFor(B)) { + assert((S->isSync() || S->isPhi()) && + "Phi spindle to be created on existing non-sync spindle"); + // Change the type of this spindle. + S->Ty = Spindle::SPType::Phi; + } else { + // Create a new spindle. + createSpindleWithEntry(B, Spindle::SPType::Phi); + ++NumSpindles; + if (ParallelFunc) + ++NumSpindlesInPF; + } + + // Use the following linear-time algorithm to partition the function's blocks + // into spindles, partition the spindles into tasks, and compute the tree of + // tasks in this function. + // + // -) A post-order traversal of the dominator tree looks for a spindle entry + // and creates a stack of blocks it finds along the way. + // + // -) Once a spindle entry is encountered, the blocks belonging to that + // spindle equal the suffix of the stack of found blocks that are all + // dominated by the spindle's entry. These blocks are removed from the stack + // and added to the spindle according to a DFS CFG traversal starting at the + // spindle's entry. + // + // -) Similarly, the post-order travesal of the dominator tree finds the set + // of spindles that make up each task. These spindles are collected and added + // to their enclosing task using the same algorithm as above. + // + // -) Finally, the post-order traversal of the dominator tree deduces the + // hierarchical nesting of tasks within the function. Subtasks are associated + // with their parent task whenever a task entry that dominates the previous + // task entry is encountered. + std::vector FoundBlocks; + SmallVector FoundSpindles; + SmallVector FoundTFCreates; + SmallVector UnassocTasks; + for (auto DomNode : post_order(DomTree.getRootNode())) { + BasicBlock *BB = DomNode->getBlock(); + // If a basic block is not a spindle entry, mark it found and continue. + if (!getSpindleFor(BB)) { + // Perform some rare, special-case handling of detach unwind blocks. + if (shouldCreateSpindleAtDetachUnwind(BB, *this, DomTree)) { + createSpindleWithEntry(BB, Spindle::SPType::Phi); + ++NumSpindles; + } else { + FoundBlocks.push_back(BB); + continue; + } + } + // This block is a spindle entry. + Spindle *S = getSpindleFor(BB); + + // Associated blocks dominated by spindle S with spindle S. + { + SmallPtrSet UnassocBlocks; + // Determine which found blocks are associated with this spindle. Because + // of the post-order tree traversal, these blocks form a suffix of + // FoundBlocks. + while (!FoundBlocks.empty()) { + BasicBlock *FB = FoundBlocks.back(); + if (DomTree.dominates(S->getEntry(), FB)) { + UnassocBlocks.insert(FB); + FoundBlocks.pop_back(); + } else + break; + } + + // Associate the unassociated blocks with spindle S. + if (!UnassocBlocks.empty()) + AssociateWithSpindle(this, S, UnassocBlocks); + } + + // Mark taskframe.create spindles found. + if (Value *TaskFrame = S->getTaskFrameCreate()) { + FoundTFCreates.push_back(S); + for (Task *SubT : reverse(UnassocTasks)) { + if (!DomTree.dominates(S->getEntry(), SubT->getEntry())) + break; + // If SubT uses the TaskFrame created in S, associate the two. + if (SubT->getTaskFrameUsed() == TaskFrame) { + AssociateTaskFrameWithUser(SubT, S); + break; + } + } + } + + // If this spindle is not an entry to a task, mark it found and continue. + if (!getTaskFor(S)) { + FoundSpindles.push_back(S); + continue; + } + // This spindle is a task entry. + Task *T = getTaskFor(S); + + // Associate spindles dominated by task T with task T. + { + SmallPtrSet UnassocSpindles; + // Determine which found spindles are associated with this task. Because + // of the post-order tree traversal, these spindles form a suffix of + // FoundSpindles. + while (!FoundSpindles.empty()) { + Spindle *FS = FoundSpindles.back(); + if (DomTree.dominates(T->getEntry(), FS->getEntry())) { + UnassocSpindles.insert(FS); + FoundSpindles.pop_back(); + } else + break; + } + // Associate the unassociated spindles with task T. + if (!UnassocSpindles.empty()) + AssociateWithTask(this, T, UnassocSpindles); + } + + // If the last task is dominated by this task, add the unassociated tasks as + // children of this task. + while (!UnassocTasks.empty()) { + Task *LastTask = UnassocTasks.back(); + if (!DomTree.dominates(T->getEntry(), LastTask->getEntry())) + break; + T->addSubTask(LastTask); + UnassocTasks.pop_back(); + } + UnassocTasks.push_back(T); + + // Add taskframe.create spindles as children of this task. + while (!FoundTFCreates.empty()) { + Spindle *TF = FoundTFCreates.back(); + if (!DomTree.dominates(T->getEntry(), TF->getEntry())) + break; + T->TaskFrameCreates.push_back(TF); + FoundTFCreates.pop_back(); + } + } + + // Populate the predecessors and successors of all spindles. + computeSpindleEdges(this); + + // Record continuation spindles for each task. + recordContinuationSpindles(this); + + if (PrintTaskFrameTree) + // Determine the subtasks of taskframes discovered. + findTaskFrameTree(); +} + +/// Recursive helper to traverse the spindles to discover the taskframe tree. +void TaskInfo::findTaskFrameTreeHelper( + Spindle *TFSpindle, SmallVectorImpl &ParentWorkList, + SmallPtrSetImpl &SubTFVisited) { + const Value *TFCreate = TFSpindle->getTaskFrameCreate(); + const Task *UserT = TFSpindle->getTaskFromTaskFrame(); + const Spindle *Continuation = nullptr; + const Spindle *EHContinuation = nullptr; + if (UserT) { + Continuation = UserT->getContinuationSpindle(); + EHContinuation = UserT->getEHContinuationSpindle(); + } else { + // This taskframe is not associated with a task. Examine the uses of the + // taskframe to determine its continuation and exceptional-continuation + // spindles. + for (const User *U : TFCreate->users()) { + if (const Instruction *I = dyn_cast(U)) { + if (isTapirIntrinsic(Intrinsic::taskframe_end, I) && + isCanonicalTaskFrameEnd(I)) + Continuation = getSpindleFor(I->getParent()->getSingleSuccessor()); + else if (isTaskFrameResume(I)) { + const InvokeInst *II = dyn_cast(I); + EHContinuation = getSpindleFor(II->getUnwindDest()); + } + } + } + } + + SmallVector WorkList; + SmallPtrSet Visited; + WorkList.push_back(TFSpindle); + while (!WorkList.empty()) { + Spindle *S = WorkList.pop_back_val(); + if (!Visited.insert(S).second) + continue; + + // Add S to the set of taskframe spindles. + TFSpindle->TaskFrameSpindles.insert(S); + + for (Spindle::SpindleEdge &SuccEdge : S->out_edges()) { + // If the successor spindle is itself a TaskFrameCreate spindle, add the + // subtask that uses it, and continue. + if (SuccEdge.first->getTaskFrameCreate()) { + Spindle *SubTF = SuccEdge.first; + if (!SubTFVisited.insert(SubTF).second) + continue; + + if (Task *SubTFUser = SubTF->getTaskFrameUser()) + // Add SubTFUser as a subtask of the taskframe spindle. + TFSpindle->TaskFrameSubtasks.insert(SubTFUser); + + // Add SubTF as a subtaskframe of the taskframe spindle. + TFSpindle->SubTaskFrames.insert(SubTF); + SubTF->TaskFrameParent = TFSpindle; + + // Recur into the new taskframe. + findTaskFrameTreeHelper(SubTF, WorkList, SubTFVisited); + continue; + } + + // Handle any spindles not in the same task as TFSpindle. + if (!TFSpindle->succInSameTask(SuccEdge.first)) + if (isa(SuccEdge.second->getTerminator())) { + Task *SubT = getTaskFor(SuccEdge.first); + if (SubT != UserT) { + // Add SubT as a subtask of the taskframe spindle. + TFSpindle->TaskFrameSubtasks.insert(SubT); + + // Add a spindle representing the subtask. + if (!SubT->getTaskFrameCreateSpindle()) { + Spindle *SubTF = SuccEdge.first; + // Add the subtask's entry spindle to the set of subtaskframes. + TFSpindle->SubTaskFrames.insert(SubTF); + SubTF->TaskFrameParent = TFSpindle; + + // Recur into the new taskframe. + findTaskFrameTreeHelper(SubTF, WorkList, SubTFVisited); + continue; + } else { + LLVM_DEBUG({ + if (!TFSpindle->SubTaskFrames.count(SuccEdge.first)) + dbgs() << "Search encountered subtask@" + << SubT->getEntry()->getName() << " with taskframe " + << "before that subtask's taskframe.create."; + }); + } + } + } + + // Add the normal continuation to parent worklist. + if (SuccEdge.first == Continuation) { + ParentWorkList.push_back(SuccEdge.first); + continue; + } + // Add the exception-handling continuation to the appropriate worklist. + if (SuccEdge.first == EHContinuation) { + // If TFSpindle corresponds to a taskframe.create associated with a + // task, push the successor onto our worklist. Otherwise push it onto + // the parent's worklist. + // + // TODO: Why do we ever push the EHContinuation onto our own worklist? + if (TFCreate && UserT) + WorkList.push_back(SuccEdge.first); + else + ParentWorkList.push_back(SuccEdge.first); + continue; + } + + Instruction *ExitTerm = SuccEdge.second->getTerminator(); + // Add landingpad successor of taskframe.resume to parent worklist. + if (isTaskFrameResume(ExitTerm, TFCreate)) { + if (SuccEdge.first->getEntry() == + cast(ExitTerm)->getUnwindDest()) + ParentWorkList.push_back(SuccEdge.first); + continue; + } + // Add landingpad successor of detached.rethrow to the appropriate worklist. + if (isDetachedRethrow(ExitTerm)) { + if (SuccEdge.first->getEntry() == + cast(ExitTerm)->getUnwindDest()) { + // If TFSpindle corresponds to a taskframe.create, push the successor + // onto our worklist. Otherwise push it onto the parent's worklist. + if (TFCreate) + WorkList.push_back(SuccEdge.first); + else + ParentWorkList.push_back(SuccEdge.first); + } + continue; + } + + WorkList.push_back(SuccEdge.first); + } + } +} + +/// Compute the spindles and subtasks contained in all taskframes. +void TaskInfo::findTaskFrameTree() { + // If we've already found the taskframe tree, don't recompute it. + if (ComputedTaskFrameTree) + return; + + SmallPtrSet SubTFVisited; + // Get the taskframe tree under each taskframe.create in the root task. + for (Spindle *TFSpindle : getRootTask()->taskframe_creates()) { + SmallVector WorkList; + if (!SubTFVisited.insert(TFSpindle).second) + continue; + findTaskFrameTreeHelper(TFSpindle, WorkList, SubTFVisited); + } + + // Get the taskframe tree under each subtask that does not have an associated + // taskframe.create. + for (Task *SubT : getRootTask()->subtasks()) { + // If this subtask uses a taskframe, then we should have discovered its + // taskframe tree already. + if (SubT->getTaskFrameUsed()) + continue; + SmallVector WorkList; + // Treat the entry spindle of the subtask as the taskframe spindle. + Spindle *TFSpindle = SubT->getEntrySpindle(); + if (!SubTFVisited.insert(TFSpindle).second) + continue; + findTaskFrameTreeHelper(TFSpindle, WorkList, SubTFVisited); + } + + // Discover taskframe roots for all tasks in the function. + for (Task *T : post_order(getRootTask())) { + // Find taskframe.creates in T that do not have parents in T, and add them + // as taskframe roots of T. + for (Spindle *TFSpindle : T->taskframe_creates()) { + if (Spindle *Parent = TFSpindle->getTaskFrameParent()) { + if (!T->contains(Parent)) + T->TaskFrameRoots.push_back(TFSpindle); + } else { + T->TaskFrameRoots.push_back(TFSpindle); + } + } + + // For any subtask of T that does not have a taskframe, add its entry + // spindle as a taskframe root. + for (Task *SubT : T->subtasks()) { + // If SubT does not have an associated taskframe, then we might need to + // mark it as a taskframe root. + if (!SubT->getTaskFrameUsed()) { + Spindle *EffectiveTF = SubT->getEntrySpindle(); + if (Spindle *Parent = EffectiveTF->getTaskFrameParent()) { + if (!T->contains(Parent)) + T->TaskFrameRoots.push_back(EffectiveTF); + } else { + T->TaskFrameRoots.push_back(EffectiveTF); + } + } + } + } + + // Record that the taskframe tree has been computed. + ComputedTaskFrameTree = true; +} + +/// Determine which blocks the value is live in. +/// +/// These are blocks which lead to uses. Knowing this allows us to avoid +/// inserting PHI nodes into blocks which don't lead to uses (thus, the inserted +/// phi nodes would be dead). +static void ComputeLiveInBlocks( + const AllocaInst *AI, + const SmallVectorImpl &UsingBlocks, + const SmallPtrSetImpl &DefBlocks, + SmallPtrSetImpl &LiveInBlocks) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist(UsingBlocks.begin(), + UsingBlocks.end()); + + // If any of the using blocks is also a definition block, check to see if the + // definition occurs before or after the use. If it happens before the use, + // the value isn't really live-in. + for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { + BasicBlock *BB = LiveInBlockWorklist[i]; + if (!DefBlocks.count(BB)) + continue; + + // Okay, this is a block that both uses and defines the value. If the first + // reference to the alloca is a def (store), then we know it isn't live-in. + for (BasicBlock::iterator I = BB->begin();; ++I) { + if (StoreInst *SI = dyn_cast(I)) { + if (SI->getOperand(1) != AI) + continue; + + // We found a store to the alloca before a load. The alloca is not + // actually live-in here. + LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); + LiveInBlockWorklist.pop_back(); + --i; + --e; + break; + } + + if (LoadInst *LI = dyn_cast(I)) { + if (LI->getOperand(0) != AI) + continue; + + // Okay, we found a load before a store to the alloca. It is actually + // live into this block. + break; + } + } + } + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (BasicBlock *P : predecessors(BB)) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +// Check the set PHIBlocks if a PHI needs to be inserted in a task-continue +// block. +static bool needPhiInTaskContinue( + const TaskInfo &TI, const AllocaInst *AI, + SmallVectorImpl &PHIBlocks) { + // Determine which PHI nodes want to use a value from a detached predecessor. + // Because register state is not preserved across a reattach, these alloca's + // cannot be promoted. + for (unsigned i = 0, e = PHIBlocks.size(); i != e; ++i) { + const BasicBlock *BB = PHIBlocks[i]; + for (const_pred_iterator PI = pred_begin(BB), E = pred_end(BB); + PI != E; ++PI) { + const BasicBlock *P = *PI; + if (TI.getSpindleFor(BB) && TI.getSpindleFor(P) && + TI.getSpindleFor(BB)->predInDifferentTask(TI.getSpindleFor(P))) { + // TODO: Check if there's a store to this alloca in the task enclosing + // P. + LLVM_DEBUG(dbgs() << "Alloca " << *AI << " has use reattached from " << + P->getName() << "\n"); + return true; + } + } + } + return false; +} + +/// Check if a alloca AI is promotable based on uses in subtasks. +bool TaskInfo::isAllocaParallelPromotable(const AllocaInst *AIP) const { + if (getTaskFor(AIP->getParent())->isSerial()) return true; + + DominatorTree &DomTree = getRootTask()->DomTree; + AllocaInst *AI = const_cast(AIP); + SmallPtrSet DefBlocks; + SmallVector UsingBlocks; + const Spindle *OnlySpindle = getSpindleFor(AIP->getParent()); + bool OnlyUsedInOneSpindle = true; + + // As we scan the uses of the alloca instruction, keep track of stores, and + // decide whether all of the loads and stores to the alloca are within the + // same basic block. + for (auto UI = AI->user_begin(), E = AI->user_end(); UI != E;) { + Instruction *User = cast(*UI++); + if (StoreInst *SI = dyn_cast(User)) { + // Remember the basic blocks which define new values for the alloca + DefBlocks.insert(SI->getParent()); + } else if (LoadInst *LI = dyn_cast(User)) { + // Otherwise it must be a load instruction, keep track of variable reads. + UsingBlocks.push_back(LI->getParent()); + } else continue; + + if (OnlyUsedInOneSpindle) + if (getSpindleFor(User->getParent()) != OnlySpindle) + OnlyUsedInOneSpindle = false; + } + + // A spindle is guaranteed to execute as a serial unit. Hence, if an alloca + // is only used in a single spindle, it is safe to promote. + if (OnlyUsedInOneSpindle) return true; + + ForwardIDFCalculator IDF(DomTree); + // Determine which blocks the value is live in. These are blocks which lead + // to uses. + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(AI, UsingBlocks, DefBlocks, LiveInBlocks); + // Filter out live-in blocks that are not dominated by the alloca. + if (AI->getParent() != DomTree.getRoot()) { + SmallVector LiveInToRemove; + for (BasicBlock *LiveIn : LiveInBlocks) + if (!DomTree.dominates(AI->getParent(), LiveIn)) + LiveInToRemove.push_back(LiveIn); + for (BasicBlock *ToRemove : LiveInToRemove) + LiveInBlocks.erase(ToRemove); + } + + // Determine which blocks need PHI nodes and see if we can optimize out some + // work by avoiding insertion of dead phi nodes. + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector PHIBlocks; + IDF.calculate(PHIBlocks); + + return !needPhiInTaskContinue(*this, AI, PHIBlocks); +} + +// This method is called once per spindle during an initial DFS traversal of the +// spindle graph. +bool IsSyncedState::markDefiningSpindle(const Spindle *S) { + LLVM_DEBUG(dbgs() << "markDefiningSpindle @ " << *S << "\n"); + // Entry spindles, detach spindles, sync spindles, and continuation-Phi + // spindles all define their sync state directly. Other Phi spindles + // determine their sync state based on their predecessors. + switch (S->getType()) { + case Spindle::SPType::Entry: + case Spindle::SPType::Detach: + SyncedState[S] = SyncInfo::TaskEntry; + return true; + case Spindle::SPType::Sync: + SyncedState[S] = SyncInfo::Synced; + return true; + case Spindle::SPType::Phi: + if (S->isTaskContinuation()) { + SyncedState[S] = SyncInfo::Unsynced; + return true; + } + } + return false; +} + +// This method is called once per unevaluated spindle in an inverse-post-order +// walk of the spindle graph. +bool IsSyncedState::evaluate(const Spindle *S, unsigned EvalNum) { + LLVM_DEBUG(dbgs() << "evaluate @ " << *S << "\n"); + + // For the first evaluation, optimistically assume that we are synced. Any + // unsynced predecessor will clear this bit. + if (!EvalNum && !SyncedState.count(S)) { + SyncedState[S] = SyncInfo::Synced; + } + + for (const Spindle::SpindleEdge &PredEdge : S->in_edges()) { + const Spindle *Pred = PredEdge.first; + const BasicBlock *Inc = PredEdge.second; + + // During the first evaluation, if we have a loop amongst Phi spindles, then + // the predecessor might not be defined. Skip predecessors that aren't + // defined. + if (!EvalNum && !SyncedState.count(Pred)) { + SyncedState[S] = setIncomplete(SyncedState[S]); + continue; + } else + assert(SyncedState.count(Pred) && + "All predecessors should have synced states after first eval."); + + // If we find an unsynced predecessor that is not terminated by a sync + // instruction, then we must be unsynced. + if (isUnsynced(SyncedState[Pred]) && + !isa(Inc->getTerminator())) { + SyncedState[S] = setUnsynced(SyncedState[S]); + break; + } + } + // Because spindles are evaluated in each round in an inverse post-order + // traversal, two evaluations should suffice. If we have an incomplete synced + // state at the end of the first evaluation, then we conclude that it's synced + // at set it complete. + if (EvalNum && isIncomplete(SyncedState[S])) { + SyncedState[S] = setComplete(SyncedState[S]); + return true; + } + return !isIncomplete(SyncedState[S]); +} + +// This method is called once per spindle during an initial DFS traversal of +// the spindle graph. +bool MaybeParallelTasks::markDefiningSpindle(const Spindle *S) { + LLVM_DEBUG(dbgs() << "MaybeParallelTasks::markDefiningSpindle @ " + << S->getEntry()->getName() << "\n"); + switch (S->getType()) { + // Emplace empty task lists for Entry, Detach, and Sync spindles. + case Spindle::SPType::Entry: + case Spindle::SPType::Detach: + TaskList.try_emplace(S); + return true; + case Spindle::SPType::Sync: + return false; + case Spindle::SPType::Phi: { + // At task-continuation Phi's, initialize the task list with the detached + // task that reattaches to this continuation. + if (S->isTaskContinuation()) { + LLVM_DEBUG(dbgs() << " TaskCont spindle " << S->getEntry()->getName() + << "\n"); + for (const Spindle *Pred : predecessors(S)) { + LLVM_DEBUG(dbgs() << " pred spindle " + << Pred->getEntry()->getName() << "\n"); + if (S->predInDifferentTask(Pred)) + TaskList[S].insert(Pred->getParentTask()); + } + LLVM_DEBUG({ + for (const Task *MPT : TaskList[S]) + dbgs() << " Added MPT " << MPT->getEntry()->getName() << "\n"; + }); + return true; + } + return false; + } + } + return false; +} + +// This method is called once per unevaluated spindle in an inverse-post-order +// walk of the spindle graph. +bool MaybeParallelTasks::evaluate(const Spindle *S, unsigned EvalNum) { + LLVM_DEBUG(dbgs() << "MaybeParallelTasks::evaluate @ " + << S->getEntry()->getName() << "\n"); + if (!TaskList.count(S)) + TaskList.try_emplace(S); + + bool NoChange = true; + for (const Spindle::SpindleEdge &PredEdge : S->in_edges()) { + const Spindle *Pred = PredEdge.first; + const BasicBlock *Inc = PredEdge.second; + + // If the incoming edge is a sync edge, get the associated sync region. + const Value *SyncRegSynced = nullptr; + if (const SyncInst *SI = dyn_cast(Inc->getTerminator())) + SyncRegSynced = SI->getSyncRegion(); + + // Iterate through the tasks in the task list for Pred. + for (const Task *MP : TaskList[Pred]) { + // Filter out any tasks that are synced by the sync region. + if (const DetachInst *DI = MP->getDetach()) + if (SyncRegSynced == DI->getSyncRegion()) + continue; + // Insert the task into this spindle's task list. If this task is a new + // addition, then we haven't yet reached the fixed point of this analysis. + if (TaskList[S].insert(MP).second) + NoChange = false; + } + } + LLVM_DEBUG({ + dbgs() << " New MPT list for " << S->getEntry()->getName() + << " (NoChange? " << NoChange << ")\n"; + for (const Task *MP : TaskList[S]) + dbgs() << " " << MP->getEntry()->getName() << "\n"; + }); + return NoChange; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const Spindle &S) { + S.print(OS); + return OS; +} + +bool TaskInfo::invalidate(Function &F, const PreservedAnalyses &PA, + FunctionAnalysisManager::Invalidator &) { + // Check whether the analysis, all analyses on functions, or the function's + // CFG have been preserved. + auto PAC = PA.getChecker(); + return !(PAC.preserved() || PAC.preservedSet>() || + PAC.preservedSet()); +} + +static const BasicBlock *getSingleNotUnreachableSuccessor( + const BasicBlock *BB) { + const BasicBlock *SingleSuccessor = nullptr; + for (const auto *Succ : children(BB)) { + if (isa(Succ->getFirstNonPHIOrDbgOrLifetime())) + continue; + if (!SingleSuccessor) + SingleSuccessor = Succ; + else + return nullptr; + } + return SingleSuccessor; +} + +/// Print spindle with all the BBs inside it. +void Spindle::print(raw_ostream &OS, bool Verbose) const { + if (getParentTask()->getEntrySpindle() == this) + OS << ""; + BasicBlock *Entry = getEntry(); + for (unsigned i = 0; i < getBlocks().size(); ++i) { + BasicBlock *BB = getBlocks()[i]; + if (BB == Entry) { + if (getTaskFrameCreate()) + OS << ""; + switch (Ty) { + case SPType::Entry: OS << ""; break; + case SPType::Detach: OS << ""; break; + case SPType::Sync: OS << ""; break; + case SPType::Phi: OS << ""; break; + } + } + if (!Verbose) { + if (i) OS << ","; + BB->printAsOperand(OS, false); + } else + OS << "\n"; + + if (isSpindleExiting(BB)) { + OS << ""; + if (isTaskFrameResume(BB->getTerminator())) + OS << ""; + else if (getParentTask()->isTaskExiting(BB)) { + if (isa(BB->getTerminator()) || + isa(BB->getTerminator())) + OS << ""; + else if (isa(BB->getTerminator()) || + isa(BB->getTerminator())) + OS << ""; + else if (getParentTask()->getEHContinuationSpindle() && + (getSingleNotUnreachableSuccessor(BB) == + getParentTask()->getEHContinuationSpindle()->getEntry())) + OS << ""; + else + OS << ""; + } + } + if (Verbose) + BB->print(OS); + } +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const Task &T) { + T.print(OS); + return OS; +} + +/// Print task with all the BBs inside it. +void Task::print(raw_ostream &OS, unsigned Depth, bool Verbose) const { + OS.indent(Depth * 2) << "task at depth " << Depth << ": "; + + // Print the spindles in this task. + for (const Spindle *S : + depth_first>(getEntrySpindle())) { + OS << "{"; + S->print(OS, Verbose); + OS << "}"; + } + OS << "\n"; + + // If this task contains tracks any shared EH spindles for its subtasks, print + // those shared EH spindles. + for (const Spindle *S : shared_eh_spindles()) { + OS << "{"; + S->print(OS, Verbose); + OS << "}\n"; + } + + // Print the subtasks of this task. + for (const Task *SubTask : getSubTasks()) + SubTask->print(OS, Depth+1, Verbose); +} + +static void printTaskFrame(raw_ostream &OS, const Spindle *TFEntry, + unsigned Depth, bool Verbose) { + OS.indent(Depth * 2) << "taskframe at depth " << Depth << ": "; + + OS << "spindle@" << TFEntry->getEntry()->getName(); + if (const Task *User = TFEntry->getTaskFromTaskFrame()) + OS << " (used by task@" << User->getEntry()->getName() << ")"; + OS << "\n"; + + for (const Spindle *SubTF : TFEntry->subtaskframes()) + printTaskFrame(OS, SubTF, Depth+1, Verbose); +} + +// Debugging +void TaskInfo::print(raw_ostream &OS) const { + OS << "Spindles:\n"; + SmallVector WorkList; + SmallPtrSet Visited; + WorkList.push_back(getRootTask()->getEntrySpindle()); + while (!WorkList.empty()) { + const Spindle *S = WorkList.pop_back_val(); + if (!Visited.insert(S).second) continue; + + OS << "{"; + S->print(OS); + OS << "}"; + + for (const Spindle *Succ : successors(S)) + WorkList.push_back(Succ); + } + OS << "\n\n"; + + OS << "Task tree:\n"; + getRootTask()->print(OS); + OS << "\n"; + + for (const Task *T : post_order(getRootTask())) { + if (T->taskframe_creates().begin() == T->taskframe_creates().end()) + continue; + OS << "task@" << T->getEntry()->getName() << " has taskframe.creates:\n"; + for (const Spindle *S : T->taskframe_creates()) { + OS << " spindle@" << S->getEntry()->getName() << "\n"; + // Print the task that uses this taskframe.create + if (S->getTaskFrameUser()) + OS << " used by task@" + << S->getTaskFrameUser()->getEntry()->getName() << "\n"; + else + OS << " not used.\n"; + + // Print the subtaskframess under this taskframe.create. + for (const Spindle *SubTF : S->subtaskframes()) + OS << " contains subtaskframe@" + << SubTF->getEntry()->getName() << "\n"; + + // Print the subtasks under this taskframe.create. + for (const Task *SubT : S->taskframe_subtasks()) + OS << " contains subtask@" + << SubT->getEntry()->getName() << "\n"; + + // Print the taskframe spindles themselves. + for (const Spindle *TFSpindle : S->taskframe_spindles()) + OS << " " << *TFSpindle << "\n"; + } + OS << "\n"; + } + + if (PrintTaskFrameTree) { + for (const Spindle *TFCreate : getRootTask()->taskframe_roots()) { + printTaskFrame(OS, TFCreate, 0, false); + OS << "\n"; + } + } + + if (PrintMayHappenInParallel) { + // Evaluate the tasks that might be in parallel with each spindle, and + // determine number of discriminating syncs: syncs that sync a subset of the + // detached tasks, based on sync regions. + MaybeParallelTasks MPTasks; + evaluateParallelState(MPTasks); + for (const Task *T : depth_first(getRootTask())) { + // Skip tasks with no subtasks. + if (T->isSerial()) continue; + + for (const Spindle *S : T->spindles()) { + // Only conider spindles that might have tasks in parallel. + if (MPTasks.TaskList[S].empty()) continue; + + OS << "spindle@" << S->getEntry()->getName(); + OS << " may happen in parallel with:\n"; + for (const Task *MPT : MPTasks.TaskList[S]) + OS << " task@" << MPT->getEntry()->getName() << "\n"; + } + } + } +} + +AnalysisKey TaskAnalysis::Key; + +TaskInfo TaskAnalysis::run(Function &F, FunctionAnalysisManager &AM) { + // FIXME: Currently we create a TaskInfo from scratch for every function. + // This may prove to be too wasteful due to deallocating and re-allocating + // memory each time for the underlying map and vector datastructures. At some + // point it may prove worthwhile to use a freelist and recycle TaskInfo + // objects. I don't want to add that kind of complexity until the scope of + // the problem is better understood. + TaskInfo TI; + TI.analyze(F, AM.getResult(F)); + return TI; +} + +PreservedAnalyses TaskPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + AM.getResult(F).print(OS); + return PreservedAnalyses::all(); +} + +void llvm::printTask(Task &T, raw_ostream &OS, const std::string &Banner) { + + if (forcePrintModuleIR()) { + // handling -print-module-scope + OS << Banner << " (task: "; + T.getEntry()->printAsOperand(OS, false); + OS << ")\n"; + + // printing whole module + OS << *T.getEntry()->getModule(); + return; + } + + OS << Banner; + + for (auto *S : T.spindles()) { + if (T.getEntrySpindle() == S) + OS << "entry spindle: "; + else + OS << "spindle: "; + + for (auto *Block : S->blocks()) + if (Block) + Block->print(OS); + else + OS << "Printing block"; + } +} + +void Task::verify(const TaskInfo *TI, const BasicBlock *Entry, + const DominatorTree &DT) const { + // Scan the blocks and spindles in this task and check that TaskInfo stores + // the correct information for them. + SmallPtrSet DetachedBlocks; + for (Spindle *S : spindles()) { + assert(TI->getTaskFor(S) == this && + "TaskInfo associates spindle with different task"); + for (BasicBlock *B : S->blocks()) { + assert(encloses(B) && + "Task spindle contains a block not enclosed by task"); + assert(DT.dominates(Entry, B) && + "Task entry does not dominate all task blocks"); + assert(TI->getSpindleFor(B) == S && + "TaskInfo associates block with different spindle"); + + if (DetachInst *DI = dyn_cast(B->getTerminator())) { + assert(TI->isTaskEntry(DI->getDetached()) && + "Detached block is not a task entry"); + // Record all blocks found to be detached by this task. + DetachedBlocks.insert(DI->getDetached()); + } + } + } + + // Verify that the same number of detached blocks and subtasks are found. + assert(DetachedBlocks.size() == getSubTasks().size() && + "Mismatch found between detached blocks and subtasks"); + + for (Task *T : getSubTasks()) { + // Check the entry of this subtask and its predecessor. + BasicBlock *TEntry = T->getEntry(); + assert(DetachedBlocks.count(TEntry) && + "Subtask entry not among set of detached blocks"); +#ifndef NDEBUG + BasicBlock *TPred = TEntry->getSinglePredecessor(); + assert(TPred && "Task entry does not have a single predecessors"); + + // Check the successors of the detach instruction that created this task. + DetachInst *DI = dyn_cast(TPred->getTerminator()); + assert(DI && "Task predecessor is not terminated by a detach"); + assert(DI->getDetached() == TEntry && + "Task entry is not a detached successor"); + assert(!DT.dominates(TEntry, DI->getContinue()) && + "Task entry dominates continuation of task."); + assert((!DI->hasUnwindDest() || + !DT.dominates(TEntry, DI->getUnwindDest())) && + "Task entry dominates unwind destination of detach"); + + // Check that detach edge dominates all blocks in subtask. + SmallVector TaskBlocks; + T->getDominatedBlocks(TaskBlocks); + BasicBlockEdge DetachEdge(TPred, TEntry); + for (BasicBlock *B : TaskBlocks) + assert(DT.dominates(DetachEdge, B) && + "Detach edge does not dominate all blocks in task"); +#endif + // Recursively verify the subtask. + T->verify(TI, TEntry, DT); + } +} + +void TaskInfo::verify(const DominatorTree &DT) const { + assert(RootTask && "No root task found"); + assert(RootTask->getEntry() == DT.getRoot() && + "Root task not rooted at dominator tree root"); + // Test the set of blocks extracted by getBlocks(), which uses the Task's + // associated dominator tree. + SmallVector TaskBlocks; + RootTask->getDominatedBlocks(TaskBlocks); +#ifndef NDEBUG + for (BasicBlock *B : TaskBlocks) { + Spindle *S = getSpindleFor(B); + assert(S && "TaskInfo does not associate this block with a spindle"); + assert(getTaskFor(S) && + "TaskInfo does not associate a task with this spindle"); + } +#endif + RootTask->verify(this, DT.getRoot(), DT); +} + +//===----------------------------------------------------------------------===// +// TaskInfo implementation +// + +TaskInfoWrapperPass::TaskInfoWrapperPass() : FunctionPass(ID) { + initializeTaskInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char TaskInfoWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(TaskInfoWrapperPass, "tasks", "Tapir Task Information", + true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(TaskInfoWrapperPass, "tasks", "Tapir Task Information", + true, true) + +bool TaskInfoWrapperPass::runOnFunction(Function &F) { + releaseMemory(); + TI.analyze(F, getAnalysis().getDomTree()); + return false; +} + +void TaskInfoWrapperPass::verifyAnalysis() const { + // TaskInfoWrapperPass is a FunctionPass, but verifying every task in the + // function each time verifyAnalysis is called is very expensive. The + // -verify-task-info option can enable this. + if (VerifyTaskInfo) { + auto &DT = getAnalysis().getDomTree(); + TI.verify(DT); + } +} + +void TaskInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); +} + +void TaskInfoWrapperPass::print(raw_ostream &OS, const Module *) const { + TI.print(OS); +} + +PreservedAnalyses TaskVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + TaskInfo &TI = AM.getResult(F); + auto &DT = AM.getResult(F); + TI.verify(DT); + return PreservedAnalyses::all(); +} + +//===----------------------------------------------------------------------===// +// Associated analysis routines + +/// Examine a given loop to determine if it is structurally a Tapir loop. +/// Returns the Task that encodes the loop body if so, or nullptr if not. +Task *llvm::getTaskIfTapirLoopStructure(const Loop *L, TaskInfo *TI) { + if (!L || !TI) + return nullptr; + + const BasicBlock *Header = L->getHeader(); + const BasicBlock *Latch = L->getLoopLatch(); + + LLVM_DEBUG(dbgs() << "Analyzing loop: " << *L); + + // Header must be terminated by a detach. + const DetachInst *DI = dyn_cast(Header->getTerminator()); + if (!DI) { + LLVM_DEBUG(dbgs() << "Loop header does not detach.\n"); + return nullptr; + } + + // Loop must have a unique latch. + if (!Latch) { + LLVM_DEBUG(dbgs() << "Loop does not have a unique latch.\n"); + return nullptr; + } + + // The loop latch must be the continuation of the detach in the header. + if (Latch != DI->getContinue()) { + LLVM_DEBUG(dbgs() << + "Continuation of detach in header is not the latch.\n"); + return nullptr; + } + + Task *T = TI->getTaskFor(DI->getDetached()); + assert(T && "Detached block not mapped to a task."); + assert(T->getDetach() == DI && "Task mapped to unexpected detach."); + + // All predecessors of the latch other than the header must be in the task. + for (const BasicBlock *Pred : predecessors(Latch)) { + if (Header == Pred) continue; + if (!T->encloses(Pred)) { + LLVM_DEBUG(dbgs() << "Latch has predecessor outside of spawned body.\n"); + return nullptr; + } + } + + // For each exit from the latch, any predecessor of that exit inside the loop + // must be the header or the latch. + for (const BasicBlock *Exit : successors(Latch)) { + for (const BasicBlock *ExitPred : predecessors(Exit)) { + if (!L->contains(ExitPred)) continue; + if (Header != ExitPred && Latch != ExitPred) { + LLVM_DEBUG(dbgs() << + "Loop branches to an exit of the latch from a block " << + "other than the header or latch.\n"); + return nullptr; + } + } + } + +#ifndef NDEBUG + // EXPENSIVE CHECK for verification. + // + // The blocks in this loop can only be the header, the latch, or a block + // contained in the task. + for (const BasicBlock *BB : L->blocks()) { + if (BB == Header) continue; + if (BB == Latch) continue; + assert(T->encloses(BB) && + "Loop contains block not enclosed by detached task.\n"); + } +#endif + + return T; +} diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp index 05fa67d0bbf174..ea944ea7229b28 100644 --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -15,6 +15,7 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Tapir/TapirTargetIDs.h" using namespace llvm; static cl::opt ClVectorLibrary( @@ -37,6 +38,24 @@ static cl::opt ClVectorLibrary( clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL", "Arm Performance Libraries"))); +static cl::opt ClTapirTarget( + "tapir-target", cl::Hidden, cl::desc("Target runtime for Tapir"), + cl::init(TapirTargetID::OpenCilk), + cl::values(clEnumValN(TapirTargetID::None, + "none", "None"), + clEnumValN(TapirTargetID::Serial, + "serial", "Serial code"), + clEnumValN(TapirTargetID::Cilk, + "cilk", "Cilk Plus"), + clEnumValN(TapirTargetID::Cheetah, + "cheetah", "Cheetah"), + clEnumValN(TapirTargetID::OpenCilk, + "opencilk", "OpenCilk"), + clEnumValN(TapirTargetID::Lambda, + "lambda", "Lambda"), + clEnumValN(TapirTargetID::OMPTask, + "omptask", "OMPTask"))); + StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = { #define TLI_DEFINE_STRING @@ -77,6 +96,17 @@ static const FuncProtoTy Signatures[] = { static_assert(sizeof Signatures / sizeof *Signatures == LibFunc::NumLibFuncs, "Missing library function signatures"); +TapirTargetOptions *TapirTargetOptions::clone() const { + TapirTargetOptions *New = nullptr; + switch (getKind()) { + default: + llvm_unreachable("Unhandled TapirTargetOption."); + case TTO_OpenCilk: + New = cast(this)->cloneImpl(); + } + return New; +} + static bool hasSinCosPiStret(const Triple &T) { // Only Darwin variants have _stret versions of combined trig functions. if (!T.isOSDarwin()) @@ -865,6 +895,9 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } TLI.addVectorizableFunctionsFromVecLib(ClVectorLibrary, T); + + TLI.setTapirTarget(ClTapirTarget); + TLI.addTapirTargetLibraryFunctions(ClTapirTarget); } TargetLibraryInfoImpl::TargetLibraryInfoImpl() { @@ -886,10 +919,13 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI) ShouldExtI32Return(TLI.ShouldExtI32Return), ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), - SizeOfInt(TLI.SizeOfInt) { + SizeOfInt(TLI.SizeOfInt), TapirTarget(TLI.TapirTarget) { + if (TLI.TTOptions) + TTOptions = std::unique_ptr(TLI.TTOptions->clone()); memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + TapirTargetFuncs = TLI.TapirTargetFuncs; } TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) @@ -898,11 +934,13 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI) ShouldExtI32Return(TLI.ShouldExtI32Return), ShouldSignExtI32Param(TLI.ShouldSignExtI32Param), ShouldSignExtI32Return(TLI.ShouldSignExtI32Return), - SizeOfInt(TLI.SizeOfInt) { + SizeOfInt(TLI.SizeOfInt), TapirTarget(TLI.TapirTarget), + TTOptions(std::move(TLI.TTOptions)) { std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); VectorDescs = TLI.VectorDescs; ScalarDescs = TLI.ScalarDescs; + TapirTargetFuncs = TLI.TapirTargetFuncs; } TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) { @@ -912,6 +950,9 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoI ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; SizeOfInt = TLI.SizeOfInt; + TapirTarget = TLI.TapirTarget; + if (TLI.TTOptions) + TTOptions = std::unique_ptr(TLI.TTOptions->clone()); memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray)); return *this; } @@ -923,6 +964,8 @@ TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl && ShouldSignExtI32Param = TLI.ShouldSignExtI32Param; ShouldSignExtI32Return = TLI.ShouldSignExtI32Return; SizeOfInt = TLI.SizeOfInt; + TapirTarget = TLI.TapirTarget; + TTOptions = std::move(TLI.TTOptions); std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray), AvailableArray); return *this; @@ -1239,6 +1282,59 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib( } } +void TargetLibraryInfoImpl::addTapirTargetLibraryFunctions( + TapirTargetID TargetID) { + switch (TargetID) { + case TapirTargetID::Cilk: + case TapirTargetID::OpenCilk: { + const StringLiteral TTFuncs[] = { + #define TLI_DEFINE_CILK_LIBS + #include "llvm/Analysis/TapirTargetFuncs.def" + }; + TapirTargetFuncs.insert(TapirTargetFuncs.end(), std::begin(TTFuncs), + std::end(TTFuncs)); + break; + } + case TapirTargetID::None: + case TapirTargetID::Serial: + case TapirTargetID::Cheetah: + case TapirTargetID::Lambda: + case TapirTargetID::OMPTask: + case TapirTargetID::Qthreads: + case TapirTargetID::Last_TapirTargetID: + break; + } + + // Ensure that the collected Tapir-target functions are in sorted order. + llvm::sort(TapirTargetFuncs); +} + +bool TargetLibraryInfoImpl::isTapirTargetLibFunc(StringRef funcName) const { + funcName = sanitizeFunctionName(funcName); + if (funcName.empty()) + return false; + + const auto Start = TapirTargetFuncs.begin(); + const auto End = TapirTargetFuncs.end(); + const auto I = std::lower_bound(Start, End, funcName); + if (I != End && *I == funcName) + return true; + return false; +} + +bool TargetLibraryInfoImpl::isTapirTargetLibFunc( + const Function &FDecl) const { + // Intrinsics don't overlap w/libcalls; if our module has a large number of + // intrinsics, this ends up being an interesting compile time win since we + // avoid string normalization and comparison. + if (FDecl.isIntrinsic()) return false; + + // TODO: Check the function prototype of the Tapir-target library function to + // ensure a match. This change may require building more detailed knowledge + // of these functions into TargetLibraryInfo. + return isTapirTargetLibFunc(FDecl.getName()); +} + bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const { funcName = sanitizeFunctionName(funcName); if (funcName.empty()) diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c751d174a48ab1..8654ef7eda701a 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -376,6 +376,11 @@ void TargetTransformInfo::getPeelingPreferences(Loop *L, ScalarEvolution &SE, return TTIImpl->getPeelingPreferences(L, SE, PP); } +void TargetTransformInfo::getStripMiningPreferences( + Loop *L, ScalarEvolution &SE, StripMiningPreferences &SMP) const { + return TTIImpl->getStripMiningPreferences(L, SE, SMP); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return TTIImpl->isLegalAddImmediate(Imm); } diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 410f93b1c215a1..7412a2fd34fa54 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -503,6 +503,14 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { // Is this an intrinsic that cannot be speculated but also cannot trap? bool llvm::isAssumeLikeIntrinsic(const Instruction *I) { + // Check for invokes of detached.rethrow, taskframe.resume, or sync.unwind. + if (const InvokeInst *II = dyn_cast(I)) + if (const Function *Called = II->getCalledFunction()) + if (Intrinsic::detached_rethrow == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID() || + Intrinsic::sync_unwind == Called->getIntrinsicID()) + return true; + if (const IntrinsicInst *CI = dyn_cast(I)) return CI->isAssumeLikeIntrinsic(); @@ -6148,6 +6156,9 @@ bool llvm::isSafeToSpeculativelyExecuteWithOpcode( case Instruction::CatchRet: case Instruction::CleanupPad: case Instruction::CleanupRet: + case Instruction::Detach: + case Instruction::Reattach: + case Instruction::Sync: return false; // Misc instructions which have effects } } diff --git a/llvm/lib/Analysis/WorkSpanAnalysis.cpp b/llvm/lib/Analysis/WorkSpanAnalysis.cpp new file mode 100644 index 00000000000000..0f0b66147c3be8 --- /dev/null +++ b/llvm/lib/Analysis/WorkSpanAnalysis.cpp @@ -0,0 +1,118 @@ +//===- WorkSpanAnalysis.cpp - Analysis to estimate work and span ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an analysis pass to estimate the work and span of the +// program. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/WorkSpanAnalysis.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "work-span" + +// Get a constant trip count for the given loop. +unsigned llvm::getConstTripCount(const Loop *L, ScalarEvolution &SE) { + int64_t ConstTripCount = 0; + // If there are multiple exiting blocks but one of them is the latch, use + // the latch for the trip count estimation. Otherwise insist on a single + // exiting block for the trip count estimation. + BasicBlock *ExitingBlock = L->getLoopLatch(); + if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) + ExitingBlock = L->getExitingBlock(); + if (ExitingBlock) + ConstTripCount = SE.getSmallConstantTripCount(L, ExitingBlock); + return ConstTripCount; +} + +/// Recursive helper routine to estimate the amount of work in a loop. +static void estimateLoopCostHelper(const Loop *L, CodeMetrics &Metrics, + WSCost &LoopCost, LoopInfo *LI, + ScalarEvolution *SE) { + if (LoopCost.UnknownCost) + return; + + // TODO: Handle control flow within the loop intelligently, using + // BlockFrequencyInfo. + for (Loop *SubL : *L) { + WSCost SubLoopCost; + estimateLoopCostHelper(SubL, Metrics, SubLoopCost, LI, SE); + // Quit early if the size of this subloop is already too big. + if (InstructionCost::getMax() == SubLoopCost.Work) + LoopCost.Work = InstructionCost::getMax(); + + // Find a constant trip count if available + int64_t ConstTripCount = SE ? getConstTripCount(SubL, *SE) : 0; + // TODO: Use a more precise analysis to account for non-constant trip + // counts. + if (!ConstTripCount) { + LoopCost.UnknownCost = true; + // If we cannot compute a constant trip count, assume this subloop + // executes at least once. + ConstTripCount = 1; + } + + // Check if the total size of this subloop is huge. + if (InstructionCost::getMax() / ConstTripCount > SubLoopCost.Work) + LoopCost.Work = InstructionCost::getMax(); + + // Check if this subloop suffices to make loop L huge. + if (InstructionCost::getMax() - LoopCost.Work < + (SubLoopCost.Work * ConstTripCount)) + LoopCost.Work = InstructionCost::getMax(); + + // Add in the size of this subloop. + LoopCost.Work += (SubLoopCost.Work * ConstTripCount); + } + + // After looking at all subloops, if we've concluded we have a huge loop size, + // return early. + if (InstructionCost::getMax() == LoopCost.Work) + return; + + for (BasicBlock *BB : L->blocks()) + if (LI->getLoopFor(BB) == L) { + // Check if this BB suffices to make loop L huge. + if (InstructionCost::getMax() - LoopCost.Work < Metrics.NumBBInsts[BB]) { + LoopCost.Work = InstructionCost::getMax(); + return; + } + LoopCost.Work += Metrics.NumBBInsts[BB]; + } +} + +void llvm::estimateLoopCost(WSCost &LoopCost, const Loop *L, LoopInfo *LI, + ScalarEvolution *SE, const TargetTransformInfo &TTI, + TargetLibraryInfo *TLI, + const SmallPtrSetImpl &EphValues) { + // TODO: Use more precise analysis to estimate the work in each call. + // TODO: Use vectorizability to enhance cost analysis. + + // Gather code metrics for all basic blocks in the loop. + for (BasicBlock *BB : L->blocks()) + LoopCost.Metrics.analyzeBasicBlock(BB, TTI, EphValues, + /*PrepareForLTO*/ false, TLI); + + estimateLoopCostHelper(L, LoopCost.Metrics, LoopCost, LI, SE); +} diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp index 466bdebc001f58..0d7a0e68eefd86 100644 --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -874,6 +874,9 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(resume, Resume); INSTKEYWORD(unreachable, Unreachable); INSTKEYWORD(callbr, CallBr); + INSTKEYWORD(detach, Detach); + INSTKEYWORD(reattach, Reattach); + INSTKEYWORD(sync, Sync); INSTKEYWORD(alloca, Alloca); INSTKEYWORD(load, Load); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index 5f0d1a76de7939..d02d2ebf82c9f3 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -6348,6 +6348,12 @@ int LLParser::parseInstruction(Instruction *&Inst, BasicBlock *BB, return parseCleanupPad(Inst, PFS); case lltok::kw_callbr: return parseCallBr(Inst, PFS); + case lltok::kw_detach: + return parseDetach(Inst, PFS); + case lltok::kw_reattach: + return parseReattach(Inst, PFS); + case lltok::kw_sync: + return parseSync(Inst, PFS); // Unary Operators. case lltok::kw_fneg: { FastMathFlags FMF = EatFastMathFlagsIfPresent(); @@ -6613,6 +6619,98 @@ bool LLParser::parseBr(Instruction *&Inst, PerFunctionState &PFS) { return false; } +/// parseDetach +/// ::= 'detach' within SyncRegion ',' TypeAndValue ',' TypeAndValue +/// ::= 'detach' within SyncRegion ',' TypeAndValue ',' TypeAndValue \ +/// unwind TypeAndValue +bool LLParser::parseDetach(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc, Loc2; + Value *SR; + BasicBlock *Op1, *Op2; + + if (parseToken(lltok::kw_within, "expected 'within' after detach")) + return true; + + if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar && + Lex.getKind() != lltok::LocalVarID) + return tokError("expected scope value for detach"); + + if (parseValue(Type::getTokenTy(Context), SR, PFS)) + return true; + + if (parseToken(lltok::comma, "expected ',' after detach scope")) + return true; + + if (parseTypeAndBasicBlock(Op1, Loc, PFS) || + parseToken(lltok::comma, "expected ',' after detached destination") || + parseTypeAndBasicBlock(Op2, Loc2, PFS)) + return true; + + LocTy Loc3; + BasicBlock *UnwindBB = nullptr; + if (EatIfPresent(lltok::kw_unwind)) { + if (parseTypeAndBasicBlock(UnwindBB, Loc3, PFS)) + return true; + Inst = DetachInst::Create(Op1, Op2, UnwindBB, SR); + } else + Inst = DetachInst::Create(Op1, Op2, SR); + return false; +} + +/// parseReattach +/// ::= 'reattach' within SyncRegion ',' TypeAndValue +bool LLParser::parseReattach(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *SR; + BasicBlock *Op; + + if (parseToken(lltok::kw_within, "expected 'within' after reatach")) + return true; + + if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar && + Lex.getKind() != lltok::LocalVarID) + return tokError("expected scope value for reattach"); + + if (parseValue(Type::getTokenTy(Context), SR, PFS)) + return true; + + if (parseToken(lltok::comma, "expected ',' after reattach scope")) + return true; + + if (parseTypeAndBasicBlock(Op, Loc, PFS)) + return true; + + Inst = ReattachInst::Create(Op, SR); + return false; +} + +/// parseSync +/// ::= 'sync' within SyncRegion ',' TypeAndValue +bool LLParser::parseSync(Instruction *&Inst, PerFunctionState &PFS) { + LocTy Loc; + Value *SR; + BasicBlock *Op; + + if (parseToken(lltok::kw_within, "expected 'within' after sync")) + return true; + + if (Lex.getKind() != lltok::kw_none && Lex.getKind() != lltok::LocalVar && + Lex.getKind() != lltok::LocalVarID) + return tokError("expected scope value for reattach"); + + if (parseValue(Type::getTokenTy(Context), SR, PFS)) + return true; + + if (parseToken(lltok::comma, "expected ',' after scope in sync")) + return true; + + if (parseTypeAndBasicBlock(Op, Loc, PFS)) + return true; + + Inst = SyncInst::Create(Op, SR); + return false; +} + /// parseSwitch /// Instruction /// ::= 'switch' TypeAndValue ',' TypeAndValue '[' JumpTable ']' diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 3797a44c179303..8a0a35ec0024c0 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2014,12 +2014,20 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::SafeStack; case bitc::ATTR_KIND_SHADOWCALLSTACK: return Attribute::ShadowCallStack; + case bitc::ATTR_KIND_STEALABLE: + return Attribute::Stealable; + case bitc::ATTR_KIND_STRAND_NO_ALIAS: + return Attribute::StrandNoAlias; + case bitc::ATTR_KIND_STRAND_PURE: + return Attribute::StrandPure; case bitc::ATTR_KIND_STRICT_FP: return Attribute::StrictFP; case bitc::ATTR_KIND_STRUCT_RET: return Attribute::StructRet; case bitc::ATTR_KIND_SANITIZE_ADDRESS: return Attribute::SanitizeAddress; + case bitc::ATTR_KIND_SANITIZE_CILK: + return Attribute::SanitizeCilk; case bitc::ATTR_KIND_SANITIZE_HWADDRESS: return Attribute::SanitizeHWAddress; case bitc::ATTR_KIND_SANITIZE_THREAD: @@ -2060,6 +2068,14 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) { return Attribute::Hot; case bitc::ATTR_KIND_PRESPLIT_COROUTINE: return Attribute::PresplitCoroutine; + case bitc::ATTR_KIND_INJECTIVE: + return Attribute::Injective; + case bitc::ATTR_KIND_HYPER_VIEW: + return Attribute::HyperView; + case bitc::ATTR_KIND_REDUCER_REGISTER: + return Attribute::ReducerRegister; + case bitc::ATTR_KIND_REDUCER_UNREGISTER: + return Attribute::ReducerUnregister; } } @@ -5730,6 +5746,73 @@ Error BitcodeReader::parseFunctionBody(Function *F) { I = new UnreachableInst(Context); InstructionList.push_back(I); break; + case bitc::FUNC_CODE_INST_DETACH: { // DETACH: [bb#, bb#, [bb#,] val] + if (Record.size() != 3 && Record.size() != 4) + return error("Invalid record"); + BasicBlock *Detached = getBasicBlock(Record[0]); + if (!Detached) + return error("Invalid record"); + + BasicBlock *Continue = getBasicBlock(Record[1]); + if (!Continue) + return error("Invalid record"); + + unsigned SREntry = 2; + BasicBlock *Unwind = nullptr; + if (Record.size() == 4) { + Unwind = getBasicBlock(Record[SREntry++]); + if (!Unwind) + return error("Invalid record"); + } + + Type *TokenTy = Type::getTokenTy(Context); + Value *SyncRegion = getValue(Record, SREntry, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); + if (!SyncRegion) + return error("Invalid record"); + + if (Unwind) + I = DetachInst::Create(Detached, Continue, Unwind, SyncRegion); + else + I = DetachInst::Create(Detached, Continue, SyncRegion); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_REATTACH: { // REATTACH: [bb#, val] + if (Record.size() != 2) + return error("Invalid record"); + + BasicBlock *DetachContinue = getBasicBlock(Record[0]); + if (!DetachContinue) + return error("Invalid record"); + + Type *TokenTy = Type::getTokenTy(Context); + Value *SyncRegion = getValue(Record, 1, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); + if (!SyncRegion) + return error("Invalid record"); + + I = ReattachInst::Create(DetachContinue, SyncRegion); + InstructionList.push_back(I); + break; + } + case bitc::FUNC_CODE_INST_SYNC: { // Sync: [bb#, val] + if (Record.size() != 2) + return error("Invalid record"); + BasicBlock *Continue = getBasicBlock(Record[0]); + if (!Continue) + return error("Invalid record"); + + Type *TokenTy = Type::getTokenTy(Context); + Value *SyncRegion = getValue(Record, 1, NextValueNo, TokenTy, + getVirtualTypeID(TokenTy), CurBB); + if (!SyncRegion) + return error("Invalid record"); + + I = SyncInst::Create(Continue, SyncRegion); + InstructionList.push_back(I); + break; + } case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.empty()) return error("Invalid phi record"); diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 9416c7f5a03e35..6c166f9c9c5421 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -765,12 +765,20 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_SAFESTACK; case Attribute::ShadowCallStack: return bitc::ATTR_KIND_SHADOWCALLSTACK; + case Attribute::Stealable: + return bitc::ATTR_KIND_STEALABLE; + case Attribute::StrandNoAlias: + return bitc::ATTR_KIND_STRAND_NO_ALIAS; + case Attribute::StrandPure: + return bitc::ATTR_KIND_STRAND_PURE; case Attribute::StrictFP: return bitc::ATTR_KIND_STRICT_FP; case Attribute::StructRet: return bitc::ATTR_KIND_STRUCT_RET; case Attribute::SanitizeAddress: return bitc::ATTR_KIND_SANITIZE_ADDRESS; + case Attribute::SanitizeCilk: + return bitc::ATTR_KIND_SANITIZE_CILK; case Attribute::SanitizeHWAddress: return bitc::ATTR_KIND_SANITIZE_HWADDRESS; case Attribute::SanitizeThread: @@ -809,6 +817,14 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_MUSTPROGRESS; case Attribute::PresplitCoroutine: return bitc::ATTR_KIND_PRESPLIT_COROUTINE; + case Attribute::Injective: + return bitc::ATTR_KIND_INJECTIVE; + case Attribute::HyperView: + return bitc::ATTR_KIND_HYPER_VIEW; + case Attribute::ReducerUnregister: + return bitc::ATTR_KIND_REDUCER_UNREGISTER; + case Attribute::ReducerRegister: + return bitc::ATTR_KIND_REDUCER_REGISTER; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: @@ -3068,6 +3084,33 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I, Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; break; + case Instruction::Detach: + { + Code = bitc::FUNC_CODE_INST_DETACH; + const DetachInst &DI = cast(I); + Vals.push_back(VE.getValueID(DI.getDetached())); + Vals.push_back(VE.getValueID(DI.getContinue())); + if (DI.hasUnwindDest()) + Vals.push_back(VE.getValueID(DI.getUnwindDest())); + pushValue(DI.getSyncRegion(), InstID, Vals); + } + break; + case Instruction::Reattach: + { + Code = bitc::FUNC_CODE_INST_REATTACH; + const ReattachInst &RI = cast(I); + Vals.push_back(VE.getValueID(RI.getSuccessor(0))); + pushValue(RI.getSyncRegion(), InstID, Vals); + } + break; + case Instruction::Sync: + { + Code = bitc::FUNC_CODE_INST_SYNC; + const SyncInst &SI = cast(I); + Vals.push_back(VE.getValueID(SI.getSuccessor(0))); + pushValue(SI.getSyncRegion(), InstID, Vals); + } + break; case Instruction::PHI: { const PHINode &PN = cast(I); diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 106571b9061beb..006c055f521f92 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -237,6 +237,7 @@ add_llvm_component_library(LLVMCodeGen SwitchLoweringUtils.cpp TailDuplication.cpp TailDuplicator.cpp + TapirCleanup.cpp TargetFrameLoweringImpl.cpp TargetInstrInfo.cpp TargetLoweringBase.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 6272b654b32953..8129d4c5d3eecf 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -129,6 +129,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackSlotColoringPass(Registry); initializeStripDebugMachineModulePass(Registry); initializeTailDuplicatePass(Registry); + initializeTapirCleanupPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); initializeTypePromotionLegacyPass(Registry); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 9a67a8d05a4dda..0fa64b2a1e51db 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1274,6 +1274,62 @@ bool IRTranslator::translateIndirectBr(const User &U, return true; } +bool IRTranslator::translateDetach(const User &U, + MachineIRBuilder &MIRBuilder) { + const DetachInst &DetInst = cast(U); + + // Lowering of Tapir instructions should have happened already. At this + // stage, treat Detach like an unconditional branch to the detached successor. + const BasicBlock &DetTgt = *cast(DetInst.getDetached()); + MachineBasicBlock &TgtBB = getMBB(DetTgt); + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + + // If the detached successor is the layout successor, fallthrough. + if (!CurBB.isLayoutSuccessor(&TgtBB)) + MIRBuilder.buildBr(TgtBB); + + // Link detached successor. + CurBB.addSuccessor(&getMBB(*cast(DetInst.getDetached()))); + return true; +} + +bool IRTranslator::translateReattach(const User &U, + MachineIRBuilder &MIRBuilder) { + const ReattachInst &ReatInst = cast(U); + + // Lowering of Tapir instructions should have happened already. At this + // stage, treat Reattach like an unconditional branch to its successor. + const BasicBlock &ReatTgt = *cast(ReatInst.getSuccessor(0)); + MachineBasicBlock &TgtBB = getMBB(ReatTgt); + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + + // If the reattach successor is the layout successor, fallthrough. + if (!CurBB.isLayoutSuccessor(&TgtBB)) + MIRBuilder.buildBr(TgtBB); + + // Link the Reattach instruction's successor. + CurBB.addSuccessor(&getMBB(*cast(ReatInst.getSuccessor(0)))); + return true; +} + +bool IRTranslator::translateSync(const User &U, MachineIRBuilder &MIRBuilder) { + const SyncInst &SInst = cast(U); + + // Lowering of Tapir instructions should have happened already. At this + // stage, treat Sync like an unconditional branch to its successor. + const BasicBlock &STgt = *cast(SInst.getSuccessor(0)); + MachineBasicBlock &TgtBB = getMBB(STgt); + MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + + // If the sync successor is the layout successor, fallthrough. + if (!CurBB.isLayoutSuccessor(&TgtBB)) + MIRBuilder.buildBr(TgtBB); + + // Link the Sync instruction's successor. + CurBB.addSuccessor(&getMBB(*cast(SInst.getSuccessor(0)))); + return true; +} + static bool isSwiftError(const Value *V) { if (auto Arg = dyn_cast(V)) return Arg->hasSwiftErrorAttr(); @@ -2415,6 +2471,23 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, return true; } + case Intrinsic::syncregion_start: + // Lower the starting point of a Tapir sync region to a no-op. + case Intrinsic::taskframe_load_guard: + // Discard any taskframe.load.guards. + case Intrinsic::taskframe_create: + // Discard any taskframe.creates. + case Intrinsic::taskframe_use: + // Discard any taskframe.uses. + case Intrinsic::taskframe_end: + // Discard any taskframe.ends. + case Intrinsic::sync_unwind: + // Discard any sync.unwinds. + case Intrinsic::tapir_runtime_start: + // Discard any tapir.runtime.starts. + case Intrinsic::tapir_runtime_end: + // Discard any tapir.runtime.ends. + return true; #define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp index 57df9b67fd0263..2d0c1a3ec5032f 100644 --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -4033,6 +4033,18 @@ template <> class SSAUpdaterTraits { return Num; } + /// BlockReattaches - Always returns false, because machine basic blocks + /// should never contain Tapir instructions. + static bool BlockReattaches(LDVSSABlock *BB, LDVSSAUpdater *Updater) { + return false; + } + + /// BlockDetaches - Always returns false, because machine basic blocks + /// should never contain Tapir instructions. + static bool BlockDetaches(LDVSSABlock *BB, LDVSSAUpdater *Updater) { + return false; + } + /// CreateEmptyPHI - Create a (representation of a) PHI in the given block. /// SSAUpdater will populate it with information about incoming values. The /// value number of this PHI is whatever the machine value number problem diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp index b2e570c5e67ec7..7b1e069cbc700e 100644 --- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp +++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -471,6 +471,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF, MF.setAlignment(YamlMF.Alignment.valueOrOne()); MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); + MF.setExposesOpaqueReturnsTwice(YamlMF.ExposesOpaqueReturnsTwice); MF.setHasWinCFI(YamlMF.HasWinCFI); MF.setCallsEHReturn(YamlMF.CallsEHReturn); diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index b91d9c4727fcd7..a8070220fee85a 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -196,6 +196,7 @@ void MIRPrinter::print(const MachineFunction &MF) { YamlMF.Name = MF.getName(); YamlMF.Alignment = MF.getAlignment(); YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); + YamlMF.ExposesOpaqueReturnsTwice = MF.exposesOpaqueReturnsTwice(); YamlMF.HasWinCFI = MF.hasWinCFI(); YamlMF.CallsEHReturn = MF.callsEHReturn(); diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 912e9ec993e3cc..82841d638edd3a 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -781,11 +781,12 @@ bool MachineBlockPlacement::shouldTailDuplicate(MachineBasicBlock *BB) { // Blocks with single successors don't create additional fallthrough // opportunities. Don't duplicate them. TODO: When conditional exits are // analyzable, allow them to be duplicated. - bool IsSimple = TailDup.isSimpleBB(BB); - if (BB->succ_size() == 1) return false; - return TailDup.shouldTailDuplicate(IsSimple, *BB); + + BlockDesc Desc = TailDup.getBlockDesc(BB); + + return TailDup.shouldTailDuplicate(Desc, *BB); } /// Compare 2 BlockFrequency's with a small penalty for \p A. @@ -3110,7 +3111,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( function_ref(RemovalCallback); SmallVector DuplicatedPreds; - bool IsSimple = TailDup.isSimpleBB(BB); + BlockDesc Desc = TailDup.getBlockDesc(BB); SmallVector CandidatePreds; SmallVectorImpl *CandidatePtr = nullptr; if (F->getFunction().hasProfileData()) { @@ -3121,7 +3122,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( if (CandidatePreds.size() < BB->pred_size()) CandidatePtr = &CandidatePreds; } - TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred, &DuplicatedPreds, + TailDup.tailDuplicateAndUpdate(Desc, BB, LPred, &DuplicatedPreds, &RemovalCallbackRef, CandidatePtr); // Update UnscheduledPredecessors to reflect tail-duplication. diff --git a/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/llvm/lib/CodeGen/MachineSSAUpdater.cpp index 48076663ddf538..740f3da466bd40 100644 --- a/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -307,6 +307,19 @@ class SSAUpdaterTraits { return NewDef->getOperand(0).getReg(); } + /// BlockReattaches - Always returns false, because machine basic blocks + /// should never contain Tapir instructions. + static bool BlockReattaches(MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { + return false; + } + + /// BlockDetaches - Always returns false, because machine basic blocks + /// should never contain Tapir instructions. + static bool BlockDetaches(MachineBasicBlock *BB, MachineSSAUpdater *Updater) { + return false; + } + /// CreateEmptyPHI - Create a PHI instruction that defines a new register. /// Add it into the specified block and return the register. static Register CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, @@ -352,6 +365,12 @@ class SSAUpdaterTraits { static Register GetPHIValue(MachineInstr *PHI) { return PHI->getOperand(0).getReg(); } + + static void MarkDetachedDef(unsigned Val, MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { + return; + } + }; } // end namespace llvm diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 8da97dc7e74240..8eb5deb3d273d5 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -202,6 +202,8 @@ namespace { bool hasStoreBetween(MachineBasicBlock *From, MachineBasicBlock *To, MachineInstr &MI); + bool possiblyHasSetjmpBetween(MachineBasicBlock *From, + MachineBasicBlock *To, MachineInstr &MI); /// Postpone the splitting of the given critical /// edge (\p From, \p To). @@ -306,6 +308,91 @@ static bool blockPrologueInterferes(const MachineBasicBlock *BB, return false; } +// Helper function to check if MBB contains a terminator that might correspond +// with EH_SjLj_Setup. +static bool blockMayContainSetjmpSetup(const MachineBasicBlock *MBB, + const MachineBasicBlock *Succ) { + for (const MachineInstr &MI : MBB->terminators()) + // It seems hard to check for EH_SjLj_Setup directly, since that instruction + // seems to be target-dependent. Instead we simply check if the terminator + // has unmodeled side effects. + if (MI.hasUnmodeledSideEffects() && + llvm::any_of(MI.operands(), [&](const MachineOperand &Op) { + return Op.isMBB() && Op.getMBB() == Succ; + })) + return true; + return false; +} + +// possiblyHasSetjmpBetween - Check for setjmps along the path from block From +// to block To. +bool MachineSinking::possiblyHasSetjmpBetween(MachineBasicBlock *From, + MachineBasicBlock *To, + MachineInstr &MI) { + // Copies and other transient instructions are safe to move past setjmps. + if (MI.isCopyLike()) + return false; + + // If MI cannot store and it does not read any register operands (which might + // be spilled), then they are safe to move past setjmps. + if (!MI.mayStore() && + !llvm::any_of(MI.operands(), [&](const MachineOperand &Op) { + if (Op.isReg() && Op.getReg().isValid() && !Op.isDef()) { + LLVM_DEBUG(dbgs() + << "Reads valid register operand " << Op << "\n"); + return true; + } + return false; + })) + return false; + + // For now we examine just the predecessors of predecessors of To for possible + // setjmp-setup constructs. For example: + // + // Pred: + // ... + // EH_SjLj_Setup BB + // BB: + // = MOV 1 + // JMP To + // To: + // = PHI + // TEST + // CONDITIONAL_JMP + // + // Note that it is safe to move an instruction after the conditional jmp, but + // not into the body of To. At this time LLVM does not seem to generate more + // complex control-flow structures encoding setjmps. This code should be + // revisited if LLVM is able to generate more complex control-flow structures + // for setjmp. + for (MachineBasicBlock *BB : To->predecessors()) { + if (BB->hasAddressTaken() && PDT->dominates(To, BB)) { + // Since BB's address is taken, BB might be the desintation of a longjmp. + LLVM_DEBUG(dbgs() << "Checking predecessor " << *BB); + for (MachineBasicBlock *Pred : BB->predecessors()) { + if (PDT->dominates(To, Pred)) { + LLVM_DEBUG(dbgs() << "Checking predecessor of predecessor " << *Pred); + if (blockMayContainSetjmpSetup(Pred, BB)) { + // Pred might contain a setjmp with BB the destination of a + // corresponding longjmp. If BB contains an instruction that + // produces a definition, assume that definition is used to + // distinguish different returns from the setjmp, meaning its unsafe + // to sink the instruction past that definition. + for (MachineInstr &I : *BB) { + if (I.mayStore() || I.getNumDefs() > 0) { + LLVM_DEBUG(dbgs() << "Found definition in pred-pred block: " + << I << "\n"); + return true; + } + } + } + } + } + } + } + return false; +} + bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI, MachineBasicBlock *MBB) { if (!MI.isCopy()) @@ -1424,6 +1511,13 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore, TryBreak = true; } + // Don't sink instructions into successors of setjmps that may execute + // multiple times. + if (!TryBreak && possiblyHasSetjmpBetween(ParentBlock, SuccToSinkTo, MI)) { + LLVM_DEBUG(dbgs() << " *** NOTE: Possible setjmp setup found\n"); + TryBreak = true; + } + // Otherwise we are OK with sinking along a critical edge. if (!TryBreak) LLVM_DEBUG(dbgs() << "Sinking along critical edge.\n"); diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp index e49885b6ad9677..34ebec0d2c8066 100644 --- a/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -231,6 +231,11 @@ namespace { void setUndefOnPrunedSubRegUses(LiveInterval &LI, Register Reg, LaneBitmask PrunedLanes); + /// Return true if the live interval from coalescing SrcLI and DstLI crosses + /// a basic-block edge that may be produced by a setjmp. + bool coalescedLiveIntervalMayCrossSetjmp(LiveInterval &SrcLI, + LiveInterval &DstLI); + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the /// src/dst of the copy instruction CopyMI. This returns true if the copy /// was successfully coalesced away. If it is not currently possible to @@ -1899,6 +1904,45 @@ void RegisterCoalescer::setUndefOnPrunedSubRegUses(LiveInterval &LI, LIS->shrinkToUses(&LI); } +// Helper function to check if MBB contains a terminator that might correspond +// with EH_SjLj_Setup. +static bool blockMayContainSetjmpSetup(const MachineBasicBlock *MBB, + const MachineBasicBlock *Succ) { + for (const MachineInstr &MI : MBB->terminators()) + // It seems hard to check for EH_SjLj_Setup directly, since that instruction + // seems to be target-dependent. Instead we simply check if the terminator + // has unmodeled side effects. + if (MI.hasUnmodeledSideEffects() && + llvm::any_of(MI.operands(), [&](const MachineOperand &Op) { + return Op.isMBB() && Op.getMBB() == Succ; + })) + return true; + return false; +} + +bool RegisterCoalescer::coalescedLiveIntervalMayCrossSetjmp( + LiveInterval &SrcLI, LiveInterval &DstLI) { + for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + // If MBB's address is taken, then it might be the destination of a longjmp. + // Check if Src or Dst are live into the block. + if (MBB->hasAddressTaken() && + (LIS->isLiveInToMBB(SrcLI, MBB) || LIS->isLiveInToMBB(DstLI, MBB))) { + // Check the predecessors of MBB for a terminator that might be a + // EH_SjLj_Setup, and check if Src and Dest are live out of that + // predecessor. + for (MachineBasicBlock *Pred : MBB->predecessors()) + if (blockMayContainSetjmpSetup(Pred, MBB) && + (LIS->isLiveOutOfMBB(SrcLI, Pred) || + LIS->isLiveOutOfMBB(DstLI, Pred))) + // Guess that the coalesced liveness range would cross this edge from + // the setjmp. + return true; + } + } + return false; +} + bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { Again = false; LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); @@ -1918,6 +1962,13 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { std::swap(SrcIdx, DstIdx); std::swap(SrcRC, DstRC); } + if (MF->exposesReturnsTwice() && + coalescedLiveIntervalMayCrossSetjmp(LIS->getInterval(CP.getSrcReg()), + LIS->getInterval(CP.getDstReg()))) { + LLVM_DEBUG( + dbgs() << "\tNot coalescing: liveness ranges may cross setjmp.\n"); + return false; + } if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, CP.getNewRC(), *LIS)) { LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); @@ -4111,7 +4162,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // // TODO: Could specifically disable coalescing registers live across setjmp // calls - if (fn.exposesReturnsTwice()) { + if (fn.exposesOpaqueReturnsTwice()) { LLVM_DEBUG( dbgs() << "* Skipped as it exposes functions that returns twice.\n"); return false; diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 1d0a03ccfcdc6a..f16389758d0e5f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -120,6 +120,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } + // If the function might be stolen, then several optimizations involving SP + // and FP aren't generally allowed. For example, the Cilk runtime system + // might change the stack a function uses after it performs a spawn, meaning + // that SP can't be used to index stack variables or temporary storage. The + // semantics for the stack memory of such a function most closely resemble + // those of a function with dynamic allocas, so we simply set this flag in + // MachineFrameInfo. + if (Fn->hasFnAttribute(Attribute::Stealable)) + MF->getFrameInfo().setHasVarSizedObjects(); + // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 0579c1664d5c9a..f5a79eb9b4e12e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -203,6 +203,8 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { SDValue Chain; unsigned NumOps = Node->getNumOperands(); + if (NumOps == 0) + return; if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) Chain = Node->getOperand(NumOps-1); if (!Chain) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 20c37eb4cb11d5..b5c559fb0d9acf 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3056,6 +3056,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops)); break; } + case Intrinsic::detached_rethrow: + // Treat detached_rethrow intrinsics like resumes. + llvm_unreachable("SelectionDAGBuilder shouldn't visit detached_rethrow " + "instructions!"); + break; + case Intrinsic::taskframe_resume: + // Treat detached_rethrow intrinsics like resumes. + llvm_unreachable("SelectionDAGBuilder shouldn't visit taskframe_resume " + "instructions!"); + break; + case Intrinsic::sync_unwind: + // Treat sync_unwind intrinsics like donothing: ignore them and jump + // directly to the next BB. + break; } } else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) { // Currently we do not lower any intrinsic calls with deopt operand bundles. @@ -3239,6 +3253,65 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } +void SelectionDAGBuilder::visitDetach(const DetachInst &I) { + MachineBasicBlock *DetachMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Detached = FuncInfo.MBBMap[I.getSuccessor(0)]; + //MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // Update machine-CFG edges. + DetachMBB->addSuccessor(Detached); + + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Detached != NextBlock(DetachMBB) || TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Detached))); + + return; + +} + +void SelectionDAGBuilder::visitReattach(const ReattachInst &I) { + MachineBasicBlock *ReattachMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Update machine-CFG edges. + ReattachMBB->addSuccessor(Continue); + + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Continue != NextBlock(ReattachMBB) || TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Continue))); + + return; +} + +void SelectionDAGBuilder::visitSync(const SyncInst &I) { + MachineBasicBlock *SyncMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(0)]; + + // Update machine-CFG edges. + SyncMBB->addSuccessor(Continue); + + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Continue != NextBlock(SyncMBB) || TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Continue))); + + return; +} + void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; if (auto *FPOp = dyn_cast(&I)) @@ -7499,6 +7572,36 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_vector_deinterleave2: visitVectorDeinterleave(I); return; + // Tapir intrinsics + case Intrinsic::syncregion_start: + // Lower the starting point of a Tapir sync region to a no-op. + return; + case Intrinsic::taskframe_load_guard: + // Discard any taskframe.load.guards. + return; + case Intrinsic::taskframe_create: + // Discard any taskframe.creates. + return; + case Intrinsic::taskframe_use: + // Discard any taskframe.uses. + return; + case Intrinsic::taskframe_end: + // Discard any taskframe.ends. + return; + case Intrinsic::sync_unwind: + // Discard any sync.unwinds. + return; + case Intrinsic::tapir_runtime_start: + // Discard any tapir.runtime.starts. + return; + case Intrinsic::tapir_runtime_end: + // Discard any tapir.runtime.ends. + return; + case Intrinsic::task_frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, + TLI.getFrameIndexTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f2496f24973a7b..a9c35f04fe920d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -58,6 +58,7 @@ class Constant; class ConstrainedFPIntrinsic; class DbgValueInst; class DataLayout; +class DetachInst; class DIExpression; class DILocalVariable; class DILocation; @@ -74,6 +75,7 @@ class LLVMContext; class LoadInst; class MachineBasicBlock; class PHINode; +class ReattachInst; class ResumeInst; class ReturnInst; class SDDbgValue; @@ -81,6 +83,7 @@ class SelectionDAG; class StoreInst; class SwiftErrorValueTracking; class SwitchInst; +class SyncInst; class TargetLibraryInfo; class TargetMachine; class Type; @@ -516,6 +519,9 @@ class SelectionDAGBuilder { void visitCatchRet(const CatchReturnInst &I); void visitCatchPad(const CatchPadInst &I); void visitCleanupPad(const CleanupPadInst &CPI); + void visitDetach(const DetachInst& I); + void visitReattach(const ReattachInst& I); + void visitSync(const SyncInst& I); BranchProbability getEdgeProbability(const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 35abd990f96899..a7937adc7a70c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -649,6 +649,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); + // Determine if there is a call to a function that returns twice that is not a + // call to the eh.sjlj.setjmp intrinsic. + for (const Instruction &I : instructions(Fn)) + if (const auto *Call = dyn_cast(&I)) + if (Call->hasFnAttr(Attribute::ReturnsTwice)) { + if (const Function *Called = Call->getCalledFunction()) + if (Called->getIntrinsicID() == + Intrinsic::eh_sjlj_setjmp) + continue; + MF->setExposesOpaqueReturnsTwice(true); + break; + } + // Determine if floating point is used for msvc computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 4b1d3637a7462e..b9dee9125aa5f2 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -984,7 +984,8 @@ bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { !(MF.getFunction().hasFnAttribute(Attribute::SanitizeAddress) || MF.getFunction().hasFnAttribute(Attribute::SanitizeThread) || MF.getFunction().hasFnAttribute(Attribute::SanitizeMemory) || - MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress)); + MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) || + MF.getFunction().hasFnAttribute(Attribute::SanitizeCilk)); // If EnableShrinkWrap is set, it takes precedence on whatever the // target sets. The rational is that we assume we want to test // something related to shrink-wrapping. diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 5ed67bd0a121ed..f541fccfaa3fae 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -68,6 +68,12 @@ static cl::opt TailDupIndirectBranchSize( "end with indirect branches."), cl::init(20), cl::Hidden); +// 0 = disable, 1 = enable CBZ optimization, 2 = increase block size threshold +static cl::opt TailDupCBZ( + "tail-dup-cbz", + cl::desc("More aggressive merging of blocks ending with cbz"), + cl::init(2), cl::Hidden); + static cl::opt TailDupVerify("tail-dup-verify", cl::desc("Verify sanity of PHI instructions during taildup"), @@ -153,7 +159,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { /// all Preds that received a copy of \p MBB. /// \p RemovalCallback - if non-null, called just before MBB is deleted. bool TailDuplicator::tailDuplicateAndUpdate( - bool IsSimple, MachineBasicBlock *MBB, + const BlockDesc &Desc, MachineBasicBlock *MBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl *DuplicatedPreds, function_ref *RemovalCallback, @@ -164,7 +170,7 @@ bool TailDuplicator::tailDuplicateAndUpdate( SmallVector TDBBs; SmallVector Copies; - if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, + if (!tailDuplicate(Desc, MBB, ForcedLayoutPred, TDBBs, Copies, CandidatePtr)) return false; @@ -264,6 +270,14 @@ bool TailDuplicator::tailDuplicateAndUpdate( return true; } +BlockDesc TailDuplicator::getBlockDesc(MachineBasicBlock *MBB) { + BlockDesc Desc; + Desc.IsSimple = isSimpleBB(MBB); + if (TailDupCBZ > 0) + Desc.BRNZ = TII->isZeroTest(*MBB); + return Desc; +} + /// Look for small blocks that are unconditionally branched to and do not fall /// through. Tail-duplicate their instructions into their predecessors to /// eliminate (dynamic) branches. @@ -280,12 +294,12 @@ bool TailDuplicator::tailDuplicateBlocks() { if (NumTails == TailDupLimit) break; - bool IsSimple = isSimpleBB(&MBB); + BlockDesc Desc = getBlockDesc(&MBB); - if (!shouldTailDuplicate(IsSimple, MBB)) + if (!shouldTailDuplicate(Desc, MBB)) continue; - MadeChange |= tailDuplicateAndUpdate(IsSimple, &MBB, nullptr); + MadeChange |= tailDuplicateAndUpdate(Desc, &MBB, nullptr); } if (PreRegAlloc && TailDupVerify) @@ -553,12 +567,12 @@ void TailDuplicator::updateSuccessorsPHIs( } /// Determine if it is profitable to duplicate this block. -bool TailDuplicator::shouldTailDuplicate(bool IsSimple, +bool TailDuplicator::shouldTailDuplicate(const BlockDesc &Desc, MachineBasicBlock &TailBB) { // When doing tail-duplication during layout, the block ordering is in flux, // so canFallThrough returns a result based on incorrect information and // should just be ignored. - if (!LayoutMode && TailBB.canFallThrough()) + if (!LayoutMode && !Desc.BRNZ && TailBB.canFallThrough()) return false; // Don't try to tail-duplicate single-block loops. @@ -575,6 +589,8 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, MaxDuplicateCount = TailDuplicateSize; else MaxDuplicateCount = TailDupSize; + if (Desc.BRNZ) + MaxDuplicateCount += (TailDupCBZ > 1) + Desc.BRNZ.value().IsKill; if (OptForSize) MaxDuplicateCount = 1; @@ -673,7 +689,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, if (HasIndirectbr && PreRegAlloc) return true; - if (IsSimple) + if (Desc.IsSimple) return true; if (!PreRegAlloc) @@ -815,6 +831,33 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, return true; } +static bool Contains(const SmallVectorImpl &Regs, Register Key) { + for (Register Reg : Regs) { + if (Key == Reg) + return true; + } + return false; +} + +static bool SafeToDelete(const MachineInstr &MI) { + if (MI.hasUnmodeledSideEffects() || MI.mayStore() || MI.isCall() || + MI.hasOrderedMemoryRef()) + return false; + if (MI.getNumDefs() <= 1) + return true; + bool SawFirst = false; + unsigned Ops = MI.getNumOperands(); + for (unsigned I = 0; I < Ops; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (MO.isDef() && !MO.isDead()) { + if (SawFirst) + return false; + SawFirst = true; + } + } + return true; +} + /// If it is profitable, duplicate TailBB's contents in each /// of its predecessors. /// \p IsSimple result of isSimpleBB @@ -825,7 +868,8 @@ bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, /// into. /// \p Copies A vector of copy instructions inserted. Used later to /// walk all the inserted copies and remove redundant ones. -bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, +bool TailDuplicator::tailDuplicate(const BlockDesc &Desc, + MachineBasicBlock *TailBB, MachineBasicBlock *ForcedLayoutPred, SmallVectorImpl &TDBBs, SmallVectorImpl &Copies, @@ -838,7 +882,7 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, DenseSet UsedByPhi; getRegsUsedByPHIs(*TailBB, &UsedByPhi); - if (IsSimple) + if (Desc.IsSimple) return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi); // Iterate through all the unique predecessors and tail-duplicate this @@ -858,10 +902,42 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, if (!canTailDuplicate(TailBB, PredBB)) continue; + int64_t PredValue = 0; + MachineInstr *RegSet = nullptr; + // If Live is true, the value produced by RegSet is used other + // than by a conditional branch. + bool Live = false; // liveness of RegSet + const BlockBRNZ *BRNZ = Desc.BRNZ ? &Desc.BRNZ.value() : nullptr; + if (BRNZ) { + Live = !BRNZ->IsKill; + const TargetRegisterInfo *TRI = MF->getRegInfo().getTargetRegisterInfo(); + // Search backwards for an instruction that sets any of the + // registers in Desc.Regs + for (MachineBasicBlock::reverse_iterator MI = PredBB->instr_rbegin(); + MI != PredBB->instr_rend(); ++MI) { + Register Dest; + if (TII->isSetConstant(*MI, Dest, PredValue) + && Contains(BRNZ->Regs, Dest)) { + RegSet = &*MI; + Live = Live || !SafeToDelete(*MI); + break; + } + for (Register Reg : BRNZ->Regs) { + if (MI->modifiesRegister(Reg, TRI) || MI->killsRegister(Reg, TRI)) { + goto loop_exit; // double break + } + if (!Live && MI->readsRegister(Reg, TRI)) { + Live = true; + } + } + } + loop_exit:; + } + // Don't duplicate into a fall-through predecessor (at least for now). // If profile is available, findDuplicateCandidates can choose better // fall-through predecessor. - if (!(MF->getFunction().hasProfileData() && LayoutMode)) { + if (!RegSet && !(MF->getFunction().hasProfileData() && LayoutMode)) { bool IsLayoutSuccessor = false; if (ForcedLayoutPred) IsLayoutSuccessor = (ForcedLayoutPred == PredBB); @@ -879,6 +955,18 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, // Remove PredBB's unconditional branch. TII->removeBranch(*PredBB); + // If RegSet is true the tail block branch becomes unconditional. + MachineBasicBlock *Succ = nullptr; + if (RegSet) { + if (!Live) { + PredBB->erase(RegSet); + RegSet = nullptr; + } + Succ = PredValue ? BRNZ->Nonzero : BRNZ->Zero; + if (!Succ) + Succ = TailBB->getFallThrough(); + } + // Clone the contents of TailBB into PredBB. DenseMap LocalVRMap; SmallVector, 4> CopyInfos; @@ -901,8 +989,15 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, PredBB->removeSuccessor(PredBB->succ_begin()); assert(PredBB->succ_empty() && "TailDuplicate called on block with multiple successors!"); - for (MachineBasicBlock *Succ : TailBB->successors()) - PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); + if (Succ) { + TII->removeBranchAndFlags(*PredBB); + TII->insertUnconditionalBranch(*PredBB, Succ, + TailBB->rbegin()->getDebugLoc()); + PredBB->addSuccessor(Succ, BranchProbability::getOne()); + } else { + for (MachineBasicBlock *Succ : TailBB->successors()) + PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); + } // Update branches in pred to jump to tail's layout successor if needed. if (ShouldUpdateTerminators) diff --git a/llvm/lib/CodeGen/TapirCleanup.cpp b/llvm/lib/CodeGen/TapirCleanup.cpp new file mode 100644 index 00000000000000..bd1422ec29f308 --- /dev/null +++ b/llvm/lib/CodeGen/TapirCleanup.cpp @@ -0,0 +1,101 @@ +//===- TapirCleanup - Cleanup leftover Tapir tasks for code generation ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass serializes any remaining Tapir instructions before code generation. +// Typically this pass should have no effect, because Tapir instructions should +// have been lowered already to a particular parallel runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "tapircleanup" + +STATISTIC(NumTasksSerialized, "Number of Tapir tasks serialized"); +STATISTIC(NumTaskFramesErased, "Number of taskframes erased"); + +namespace { +class TapirCleanup : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid. + + TapirCleanup() : FunctionPass(ID) {} + + bool runOnFunction(Function &Fn) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + StringRef getPassName() const override { + return "Tapir last-minute cleanup for CodeGen"; + } +}; +} // end anonymous namespace + +char TapirCleanup::ID = 0; + +INITIALIZE_PASS_BEGIN(TapirCleanup, DEBUG_TYPE, + "Cleanup Tapir", false, false) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(TapirCleanup, DEBUG_TYPE, + "Cleanup Tapir", false, false) + +FunctionPass *llvm::createTapirCleanupPass() { return new TapirCleanup(); } + +void TapirCleanup::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); +} + +bool TapirCleanup::runOnFunction(Function &F) { + TaskInfo &TI = getAnalysis().getTaskInfo(); + auto &ORE = getAnalysis().getORE(); + + bool Changed = false; + + // If we haven't lowered the Tapir task to a particular parallel runtime by + // this point, simply serialize the task. + for (Task *T : post_order(TI.getRootTask())) { + if (T->isRootTask()) + continue; + ORE.emit(DiagnosticInfoOptimizationFailure(DEBUG_TYPE, "CleanedUpTapir", + T->getDetach()->getDebugLoc(), + T->getDetach()->getParent()) + << "CodeGen found Tapir instructions to serialize. Specify a " + "Tapir back-end to lower Tapir instructions to a parallel " + "runtime."); + + SerializeDetach(T->getDetach(), T); + NumTasksSerialized++; + Changed = true; + } + + // Get the set of taskframes to erase. + SmallVector TaskFramesToErase; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isTapirIntrinsic(Intrinsic::taskframe_create, &I)) + TaskFramesToErase.push_back(&I); + + for (Instruction *TFCreate : TaskFramesToErase) { + eraseTaskFrame(TFCreate); + ++NumTaskFramesErased; + Changed = true; + } + + return Changed; +} diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 68a4616fe4b833..0ac80eea27a41f 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -1836,6 +1836,9 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case CatchPad: return 0; case CatchSwitch: return 0; case CleanupPad: return 0; + case Detach: return 0; + case Reattach: return 0; + case Sync: return 0; case FNeg: return ISD::FNEG; case Add: return ISD::ADD; case FAdd: return ISD::FADD; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 98ea2f21b3c807..06ef9245064113 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -883,6 +883,9 @@ void TargetPassConfig::addIRPasses() { // Make sure that no unreachable blocks are instruction selected. addPass(createUnreachableBlockEliminationPass()); + // Make sure there are no remaining Tapir instructions. + addPass(createTapirCleanupPass()); + // Prepare expensive constants for SelectionDAG. if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) addPass(createConstantHoistingPass()); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp index be4a3ed79d88c4..9bea23ca1cd06b 100644 --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -4099,6 +4099,33 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(BI.getSuccessor(0), true); Out << ", "; writeOperand(BI.getSuccessor(1), true); + } else if (isa(I)) { + // Special case detach instruction to get formatting nice and correct + const DetachInst &DI(cast(I)); + Out << " within "; + writeOperand(DI.getSyncRegion(), /*PrintType=*/false); + Out << ", "; + writeOperand(DI.getDetached(), true); + Out << ", "; + writeOperand(DI.getContinue(), true); + if (DI.hasUnwindDest()) { + Out << " unwind "; + writeOperand(DI.getUnwindDest(), true); + } + } else if (isa(I)) { + // Special case reattach instruction to get formatting nice and correct + const ReattachInst &RI(cast(I)); + Out << " within "; + writeOperand(RI.getSyncRegion(), /*PrintType=*/false); + Out << ", "; + writeOperand(RI.getSuccessor(0), true); + } else if (isa(I)) { + // Special case sync instruction to get formatting nice and correct + const SyncInst &SI(cast(I)); + Out << " within "; + writeOperand(SI.getSyncRegion(), /*PrintType=*/false); + Out << ", "; + writeOperand(SI.getSuccessor(0), true); } else if (isa(I)) { const SwitchInst& SI(cast(I)); diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 14e1787c2b14b7..6620e6206216c5 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -250,6 +250,25 @@ BasicBlock::getFirstNonPHIOrDbgOrLifetime(bool SkipPseudoOp) const { return nullptr; } +const Instruction * +BasicBlock::getFirstNonPHIOrDbgOrSyncUnwind(bool SkipPseudoOp) const { + for (const Instruction &I : *this) { + if (isa(I) || isa(I)) + continue; + + if (SkipPseudoOp && isa(I)) + continue; + + if (auto *CB = dyn_cast_or_null(&I)) + if (const Function *Called = CB->getCalledFunction()) + if (Intrinsic::sync_unwind == Called->getIntrinsicID()) + continue; + + return &I; + } + return nullptr; +} + BasicBlock::const_iterator BasicBlock::getFirstInsertionPt() const { const Instruction *FirstNonPHI = getFirstNonPHI(); if (!FirstNonPHI) diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index f7b6d54013de54..b9115e005d96c0 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3863,6 +3863,34 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op, unwrap(LHS), unwrap(RHS), Name)); } +/*--.. Parallel constructs .................................................--*/ + +LLVMValueRef LLVMBuildDetach(LLVMBuilderRef B, + LLVMBasicBlockRef DetachBB, + LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion) +{ + return wrap(unwrap(B)->CreateDetach(unwrap(DetachBB), + unwrap(ContinueBB), + unwrap(SyncRegion))); +} + +LLVMValueRef LLVMBuildReattach(LLVMBuilderRef B, + LLVMBasicBlockRef ReattachBB, + LLVMValueRef SyncRegion) +{ + return wrap(unwrap(B)->CreateReattach(unwrap(ReattachBB), + unwrap(SyncRegion))); +} + +LLVMValueRef LLVMBuildSync(LLVMBuilderRef B, + LLVMBasicBlockRef ContinueBB, + LLVMValueRef SyncRegion) +{ + return wrap(unwrap(B)->CreateSync(unwrap(ContinueBB), + unwrap(SyncRegion))); +} + /*--.. Miscellaneous instructions ..........................................--*/ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 48b5501c55ba47..e1f4d5b35e86c4 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -89,7 +89,7 @@ static void findDbgIntrinsics(SmallVectorImpl &Result, Value *V) { } }; - if (auto *L = LocalAsMetadata::getIfExists(V)) { + if (auto *L = ValueAsMetadata::getIfExists(V)) { AppendUsers(L); for (Metadata *AL : L->getAllArgListUsers()) AppendUsers(AL); diff --git a/llvm/lib/IR/EHPersonalities.cpp b/llvm/lib/IR/EHPersonalities.cpp index afbb2bb8275d6d..9ea7b6854593e0 100644 --- a/llvm/lib/IR/EHPersonalities.cpp +++ b/llvm/lib/IR/EHPersonalities.cpp @@ -42,6 +42,7 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) { .Case("rust_eh_personality", EHPersonality::Rust) .Case("__gxx_wasm_personality_v0", EHPersonality::Wasm_CXX) .Case("__xlcxx_personality_v1", EHPersonality::XL_CXX) + .Case("__cilk_personality_v0", EHPersonality::Cilk_CXX) .Default(EHPersonality::Unknown); } @@ -73,6 +74,8 @@ StringRef llvm::getEHPersonalityName(EHPersonality Pers) { return "__gxx_wasm_personality_v0"; case EHPersonality::XL_CXX: return "__xlcxx_personality_v1"; + case EHPersonality::Cilk_CXX: + return "__cilk_personality_v0"; case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!"); } diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 0dcf0ac6a78ab8..f4099a2bd7f04e 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -421,6 +421,9 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case CatchPad: return "catchpad"; case CatchSwitch: return "catchswitch"; case CallBr: return "callbr"; + case Detach: return "detach"; + case Reattach: return "reattach"; + case Sync: return "sync"; // Standard unary operators... case FNeg: return "fneg"; @@ -641,6 +644,7 @@ bool Instruction::mayReadFromMemory() const { case Instruction::VAArg: case Instruction::Load: case Instruction::Fence: // FIXME: refine definition of mayReadFromMemory + case Instruction::Sync: // Like Instruction::Fence case Instruction::AtomicCmpXchg: case Instruction::AtomicRMW: case Instruction::CatchPad: @@ -659,6 +663,7 @@ bool Instruction::mayWriteToMemory() const { switch (getOpcode()) { default: return false; case Instruction::Fence: // FIXME: refine definition of mayWriteToMemory + case Instruction::Sync: // Like Instruction::Fence case Instruction::Store: case Instruction::VAArg: case Instruction::AtomicCmpXchg: @@ -869,6 +874,15 @@ bool Instruction::isDebugOrPseudoInst() const { return isa(this) || isa(this); } +bool Instruction::isTaskFrameMarker() const { + auto II = dyn_cast(this); + if (!II) + return false; + Intrinsic::ID ID = II->getIntrinsicID(); + return ID == Intrinsic::taskframe_create || ID == Intrinsic::taskframe_use || + ID == Intrinsic::taskframe_end || ID == Intrinsic::taskframe_resume; +} + const Instruction * Instruction::getNextNonDebugInstruction(bool SkipPseudoOp) const { for (const Instruction *I = getNextNode(); I; I = I->getNextNode()) diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp index cb0ac0f8eae6ff..70d88a7c934091 100644 --- a/llvm/lib/IR/Instructions.cpp +++ b/llvm/lib/IR/Instructions.cpp @@ -1374,6 +1374,180 @@ UnreachableInst::UnreachableInst(LLVMContext &Context, BasicBlock *InsertAtEnd) : Instruction(Type::getVoidTy(Context), Instruction::Unreachable, nullptr, 0, InsertAtEnd) {} +//===----------------------------------------------------------------------===// +// DetachInst Implementation +//===----------------------------------------------------------------------===// + +void DetachInst::AssertOK() { + assert(getSyncRegion()->getType()->isTokenTy() && + "Sync region must be a token!"); +} + +void DetachInst::init(Value *SyncRegion, BasicBlock *Detached, + BasicBlock *Continue, BasicBlock *Unwind) { + Op<-1>() = SyncRegion; + Op<-2>() = Detached; + Op<-3>() = Continue; + if (Unwind) { + setSubclassData(true); + Op<-4>() = Unwind; + } +#ifndef NDEBUG + AssertOK(); +#endif +} + +DetachInst::DetachInst(BasicBlock *Detached, BasicBlock *Continue, + Value *SyncRegion, Instruction *InsertBefore) + : Instruction(Type::getVoidTy(Detached->getContext()), + Instruction::Detach, + OperandTraits::op_end(this) - 3, 3, + InsertBefore) { + init(SyncRegion, Detached, Continue); +} + +DetachInst::DetachInst(BasicBlock *Detached, BasicBlock *Continue, + Value *SyncRegion, BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(Detached->getContext()), + Instruction::Detach, + OperandTraits::op_end(this) - 3, 3, + InsertAtEnd) { + init(SyncRegion, Detached, Continue); +} + +DetachInst::DetachInst(BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind, Value *SyncRegion, + Instruction *InsertBefore) + : Instruction(Type::getVoidTy(Detached->getContext()), + Instruction::Detach, + OperandTraits::op_end(this) - 4, 4, + InsertBefore) { + init(SyncRegion, Detached, Continue, Unwind); +} + +DetachInst::DetachInst(BasicBlock *Detached, BasicBlock *Continue, + BasicBlock *Unwind, Value *SyncRegion, + BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(Detached->getContext()), + Instruction::Detach, + OperandTraits::op_end(this) - 4, 4, + InsertAtEnd) { + init(SyncRegion, Detached, Continue, Unwind); +} + +DetachInst::DetachInst(const DetachInst &DI) + : Instruction(Type::getVoidTy(DI.getContext()), Instruction::Detach, + OperandTraits::op_end(this) - + DI.getNumOperands(), + DI.getNumOperands()) { + setSubclassData( + DI.getSubclassData()); + Op<-1>() = DI.Op<-1>(); + Op<-2>() = DI.Op<-2>(); + Op<-3>() = DI.Op<-3>(); + if (DI.hasUnwindDest()) { + Op<-4>() = DI.Op<-4>(); + assert(DI.getNumOperands() == 4 && "Detach must have 4 operands!"); + } else + assert(DI.getNumOperands() == 3 && "Detach must have 3 operands!"); +} + +LandingPadInst *DetachInst::getLandingPadInst() const { + if (!hasUnwindDest()) + return nullptr; + return cast(getUnwindDest()->getFirstNonPHI()); +} + +//===----------------------------------------------------------------------===// +// ReattachInst Implementation +//===----------------------------------------------------------------------===// + +void ReattachInst::AssertOK() { + assert(getSyncRegion()->getType()->isTokenTy() && + "Sync region must be a token!"); +} + +ReattachInst::ReattachInst(BasicBlock *DetachContinue, Value *SyncRegion, + Instruction *InsertBefore) + : Instruction(Type::getVoidTy(DetachContinue->getContext()), + Instruction::Reattach, + OperandTraits::op_end(this) - 2, 2, + InsertBefore) { + Op<-1>() = SyncRegion; + Op<-2>() = DetachContinue; +#ifndef NDEBUG + AssertOK(); +#endif +} + +ReattachInst::ReattachInst(BasicBlock *DetachContinue, Value *SyncRegion, + BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(DetachContinue->getContext()), + Instruction::Reattach, + OperandTraits::op_end(this) - 2, 2, + InsertAtEnd) { + Op<-1>() = SyncRegion; + Op<-2>() = DetachContinue; +#ifndef NDEBUG + AssertOK(); +#endif +} + +ReattachInst::ReattachInst(const ReattachInst &RI) + : Instruction(Type::getVoidTy(RI.getContext()), Instruction::Reattach, + OperandTraits::op_end(this) - + RI.getNumOperands(), + RI.getNumOperands()) { + Op<-1>() = RI.Op<-1>(); + Op<-2>() = RI.Op<-2>(); + assert(RI.getNumOperands() == 2 && "Reattach must have 2 operands!"); + SubclassOptionalData = RI.SubclassOptionalData; +} + +//===----------------------------------------------------------------------===// +// SyncInst Implementation +//===----------------------------------------------------------------------===// + +void SyncInst::AssertOK() { + assert(getSyncRegion()->getType()->isTokenTy() && + "Sync region must be a token!"); +} + +SyncInst::SyncInst(BasicBlock *Continue, Value *SyncRegion, + Instruction *InsertBefore) + : Instruction(Type::getVoidTy(Continue->getContext()), Instruction::Sync, + OperandTraits::op_end(this) - 2, 2, + InsertBefore) { + Op<-1>() = SyncRegion; + Op<-2>() = Continue; +#ifndef NDEBUG + AssertOK(); +#endif +} + +SyncInst::SyncInst(BasicBlock *Continue, Value *SyncRegion, + BasicBlock *InsertAtEnd) + : Instruction(Type::getVoidTy(Continue->getContext()), Instruction::Sync, + OperandTraits::op_end(this) - 2, 2, + InsertAtEnd) { + Op<-1>() = SyncRegion; + Op<-2>() = Continue; +#ifndef NDEBUG + AssertOK(); +#endif +} + + +SyncInst::SyncInst(const SyncInst &SI) + : Instruction(Type::getVoidTy(SI.getContext()), Instruction::Sync, + OperandTraits::op_end(this) - SI.getNumOperands(), + SI.getNumOperands()) { + Op<-1>() = SI.Op<-1>(); + Op<-2>() = SI.Op<-2>(); + assert(SI.getNumOperands() == 2 && "Sync must have 2 operands!"); + SubclassOptionalData = SI.SubclassOptionalData; +} + //===----------------------------------------------------------------------===// // BranchInst Implementation //===----------------------------------------------------------------------===// @@ -5075,3 +5249,15 @@ UnreachableInst *UnreachableInst::cloneImpl() const { FreezeInst *FreezeInst::cloneImpl() const { return new FreezeInst(getOperand(0)); } + +DetachInst *DetachInst::cloneImpl() const { + return new(getNumOperands()) DetachInst(*this); +} + +ReattachInst *ReattachInst::cloneImpl() const { + return new(getNumOperands()) ReattachInst(*this); +} + +SyncInst *SyncInst::cloneImpl() const { + return new(getNumOperands()) SyncInst(*this); +} diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 36d56699c64e93..ef14d1754bd750 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -60,6 +60,10 @@ bool IntrinsicInst::mayLowerToFunctionCall(Intrinsic::ID IID) { case Intrinsic::objc_retain_autorelease: case Intrinsic::objc_sync_enter: case Intrinsic::objc_sync_exit: + case Intrinsic::hyper_lookup: + case Intrinsic::reducer_register: + case Intrinsic::reducer_unregister: + case Intrinsic::tapir_loop_grainsize: return true; default: return false; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index ba4d0f5dc18db5..3d1d43862bcf9d 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -497,6 +497,13 @@ bool StructType::containsHomogeneousScalableVectorTypes() const { return true; } +StructType *StructType::lookupOrCreate(LLVMContext &Context, StringRef Name) { + StructType *Ty = Context.pImpl->NamedStructTypes.lookup(Name); + if (!Ty) + Ty = StructType::create(Context, Name); + return Ty; +} + void StructType::setBody(ArrayRef Elements, bool isPacked) { assert(isOpaque() && "Struct body already set!"); diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 1408ce293ca654..8af133abc164f2 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -369,6 +369,10 @@ class Verifier : public InstVisitor, VerifierSupport { // Keeps track of duplicate function argument debug info. SmallVector DebugFnArgs; + // Keeps track of detach instructions whose task structures have been + // verified. + SmallPtrSet DetachesVisited; + TBAAVerifier TBAAVerifyHelper; SmallVector NoAliasScopeDecls; @@ -581,6 +585,10 @@ class Verifier : public InstVisitor, VerifierSupport { void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch); void visitCleanupReturnInst(CleanupReturnInst &CRI); + void verifyTask(const DetachInst *DI); + void visitDetachInst(DetachInst &DI); + void visitReattachInst(ReattachInst &RI); + void verifySwiftErrorCall(CallBase &Call, const Value *SwiftErrorVal); void verifySwiftErrorValue(const Value *SwiftErrorVal); void verifyTailCCMustTailAttrs(const AttrBuilder &Attrs, StringRef Context); @@ -3080,6 +3088,121 @@ void Verifier::visitCallBrInst(CallBrInst &CBI) { visitTerminator(CBI); } +// Check if the given instruction is an intrinsic with the specified ID. If a +// value \p V is specified, then additionally checks that the first argument of +// the intrinsic matches \p V. +static bool isTapirIntrinsic(Intrinsic::ID ID, const Instruction *I, + const Value *V) { + if (const CallBase *CB = dyn_cast(I)) + if (const Function *Called = CB->getCalledFunction()) + if (ID == Called->getIntrinsicID()) + if (!V || (V == CB->getArgOperand(0))) + return true; + return false; +} + +/// Returns true if the given instruction performs a detached.rethrow, false +/// otherwise. If \p SyncRegion is specified, then additionally checks that the +/// detached.rethrow uses \p SyncRegion. +static bool isDetachedRethrow(const Instruction *I, + const Value *SyncRegion = nullptr) { + return isa(I) && + isTapirIntrinsic(Intrinsic::detached_rethrow, I, SyncRegion); +} + +void Verifier::verifyTask(const DetachInst *DI) { + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(DI->getDetached()); + do { + const BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + if (const DetachInst *SDI = dyn_cast(BB->getTerminator())) { + Check(DI != SDI, "Detached task reaches its own detach", DI); + if (DetachesVisited.insert(SDI).second) + // Recursively verify the detached task. + verifyTask(SDI); + + // Add the continuation and unwind destination to the worklist. + Worklist.push_back(SDI->getContinue()); + if (SDI->hasUnwindDest()) + Worklist.push_back(SDI->getUnwindDest()); + continue; + } + + if (const ReattachInst *RI = dyn_cast(BB->getTerminator())) { + Check(DI->getSyncRegion() == RI->getSyncRegion(), + "Mismatched sync regions between detach and reattach", DI, RI); + Check(RI->getDetachContinue() == DI->getContinue(), + "Mismatched continuations between detach and reattach", DI, RI); + // Don't add the successor of the reattach, since that's outside of the + // task. + continue; + } + + if (const InvokeInst *II = dyn_cast(BB->getTerminator())) { + if (isDetachedRethrow(II)) { + Check(DI->getSyncRegion() == II->getArgOperand(0), + "Mismatched sync regions between detach and detached.rethrow", DI, + II); + Check(isa(II->getNormalDest()->getTerminator()), + "detached.rethrow intrinsic has an " + "unexpected normal destination.", + DI, II); + Check(DI->hasUnwindDest(), + "Task contains a detached.rethrow terminator, but detach has no " + "unwind destination", + DI, II); + Check(DI->getUnwindDest() == II->getUnwindDest(), + "Mismatched unwind destinations between detach and " + "detached.rethrow", + DI, II); + // Don't add the successors of the detached.rethrow, since they're + // outside of the task. + continue; + } + } + + // Check that do not encounter a return or resume in the middle of the + // task. + Check(!isa(BB->getTerminator()) && + !isa(BB->getTerminator()), + "Unexpected return or resume in task", BB->getTerminator()); + + // Add the successors of this basic block. + for (const BasicBlock *Successor : successors(BB)) + Worklist.push_back(Successor); + + } while (!Worklist.empty()); +} + +void Verifier::visitReattachInst(ReattachInst &RI) { + if (DT.isReachableFromEntry(RI.getParent())) { + // Check that the continuation of the reattach has a detach predecessor. + const BasicBlock *Continue = RI.getDetachContinue(); + bool FoundDetachPred = false; + for (const BasicBlock *Pred : predecessors(Continue)) { + if (isa(Pred->getTerminator()) && + DT.dominates(Pred, RI.getParent())) { + FoundDetachPred = true; + break; + } + } + Check(FoundDetachPred, + "No detach predecessor found for successor of reattach.", &RI); + } + visitTerminator(RI); +} + +void Verifier::visitDetachInst(DetachInst &DI) { + if (DetachesVisited.insert(&DI).second) + verifyTask(&DI); + + visitTerminator(DI); +} + void Verifier::visitSelectInst(SelectInst &SI) { Check(!SelectInst::areInvalidOperands(SI.getOperand(0), SI.getOperand(1), SI.getOperand(2)), @@ -4300,6 +4423,14 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { // landing pad block may be branched to only by the unwind edge of an // invoke. for (BasicBlock *PredBB : predecessors(BB)) { + if (const auto *DI = dyn_cast(PredBB->getTerminator())) { + Check(DI && DI->getUnwindDest() == BB && DI->getDetached() != BB && + DI->getContinue() != BB, + "A detach can only jump to a block containing a LandingPadInst " + "as the unwind destination.", + LPI); + continue; + } const auto *II = dyn_cast(PredBB->getTerminator()); Check(II && II->getUnwindDest() == BB && II->getNormalDest() != BB, "Block containing LandingPadInst must be jumped to " @@ -4996,9 +5127,13 @@ void Verifier::visitInstruction(Instruction &I) { F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 || F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint || F->getIntrinsicID() == Intrinsic::wasm_rethrow || + F->getIntrinsicID() == Intrinsic::detached_rethrow || + F->getIntrinsicID() == Intrinsic::taskframe_resume || + F->getIntrinsicID() == Intrinsic::sync_unwind || IsAttachedCallOperand(F, CBI, i), "Cannot invoke an intrinsic other than donothing, patchpoint, " - "statepoint, coro_resume, coro_destroy or clang.arc.attachedcall", + "statepoint, coro_resume, coro_destroy, detached_rethrow, " + "taskframe_resume, sync_unwind or clang.arc.attachedcall", &I); Check(F->getParent() == &M, "Referencing function in another module!", &I, &M, F, F->getParent()); @@ -6074,6 +6209,20 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(Call.getParent()->getFirstNonPHI() == &Call, "Loop intrinsic must occur at the start of the basic block.", &Call); break; + case Intrinsic::syncregion_start: { + SmallVector DetachUsers; + for (const User *U : Call.users()) + if (const DetachInst *DI = dyn_cast(U)) + if (DT.isReachableFromEntry(DI->getParent())) + DetachUsers.push_back(DI); + + for (const DetachInst *DI1 : DetachUsers) + for (const DetachInst *DI2 : DetachUsers) + if (DI1 != DI2) + Check(!DT.dominates(DI1->getDetached(), DI2->getParent()), + "One detach user of a sync region dominates another", DI1, DI2); + break; + } }; // Verify that there aren't any unmediated control transfers between funclets. diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index bc8abb751221ce..2b794bb2be5cf3 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -146,6 +146,7 @@ void llvm::computeLTOCacheKey( AddUnsigned(Conf.CGFileType); AddUnsigned(Conf.OptLevel); AddUnsigned(Conf.Freestanding); + AddUnsigned(static_cast(Conf.TapirTarget)); AddString(Conf.OptPipeline); AddString(Conf.AAPipeline); AddString(Conf.OverrideTriple); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp index 29e28876760816..24d7ff76d22280 100644 --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -229,6 +229,11 @@ createTargetMachine(const Config &Conf, const Target *TheTarget, Module &M) { return TM; } +static bool hasTapirTarget(const Config &Conf) { + return (Conf.TapirTarget != TapirTargetID::Last_TapirTargetID) && + (Conf.TapirTarget != TapirTargetID::None); +} + static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, unsigned OptLevel, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, @@ -269,6 +274,10 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, std::unique_ptr TLII( new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()))); + TLII->setTapirTarget(Conf.TapirTarget); + TLII->setTapirTargetOptions( + std::make_unique(Conf.OpenCilkABIBitcodeFile)); + TLII->addTapirTargetLibraryFunctions(); if (Conf.Freestanding) TLII->disableAllFunctions(); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); @@ -324,9 +333,11 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM, } else if (Conf.UseDefaultPipeline) { MPM.addPass(PB.buildPerModuleDefaultPipeline(OL)); } else if (IsThinLTO) { - MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary)); + MPM.addPass(PB.buildThinLTODefaultPipeline(OL, ImportSummary, + hasTapirTarget(Conf))); } else { - MPM.addPass(PB.buildLTODefaultPipeline(OL, ExportSummary)); + MPM.addPass(PB.buildLTODefaultPipeline(OL, ExportSummary, + hasTapirTarget(Conf))); } if (!Conf.DisableVerify) @@ -405,6 +416,10 @@ static void codegen(const Config &Conf, TargetMachine *TM, legacy::PassManager CodeGenPasses; TargetLibraryInfoImpl TLII(Triple(Mod.getTargetTriple())); + TLII.setTapirTarget(Conf.TapirTarget); + TLII.setTapirTargetOptions( + std::make_unique(Conf.OpenCilkABIBitcodeFile)); + TLII.addTapirTargetLibraryFunctions(); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(TLII)); CodeGenPasses.add( createImmutableModuleSummaryIndexWrapperPass(&CombinedIndex)); diff --git a/llvm/lib/Passes/CMakeLists.txt b/llvm/lib/Passes/CMakeLists.txt index 576d0f3ff44298..a52902b1146d44 100644 --- a/llvm/lib/Passes/CMakeLists.txt +++ b/llvm/lib/Passes/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_component_library(LLVMPasses ObjCARC Scalar Support + TapirOpts Target TransformUtils Vectorize diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index d0cbbcc0e310b9..14e26119588216 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CallPrinter.h" #include "llvm/Analysis/CostModel.h" #include "llvm/Analysis/CycleAnalysis.h" +#include "llvm/Analysis/DataRaceFreeAliasAnalysis.h" #include "llvm/Analysis/DDG.h" #include "llvm/Analysis/DDGPrinter.h" #include "llvm/Analysis/Delinearization.h" @@ -67,6 +68,8 @@ #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/StackLifetime.h" #include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/Analysis/TapirRaceDetect.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -134,6 +137,8 @@ #include "llvm/Transforms/Instrumentation/AddressSanitizer.h" #include "llvm/Transforms/Instrumentation/BoundsChecking.h" #include "llvm/Transforms/Instrumentation/CGProfile.h" +#include "llvm/Transforms/Instrumentation/CilkSanitizer.h" +#include "llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h" #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h" #include "llvm/Transforms/Instrumentation/GCOVProfiler.h" @@ -225,6 +230,11 @@ #include "llvm/Transforms/Scalar/TLSVariableHoist.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" +#include "llvm/Transforms/Tapir/LoopSpawningTI.h" +#include "llvm/Transforms/Tapir/LoopStripMinePass.h" +#include "llvm/Transforms/Tapir/SerializeSmallTasks.h" +#include "llvm/Transforms/Tapir/TapirToTarget.h" +#include "llvm/Transforms/Tapir/DRFScopedNoAliasAA.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/BreakCriticalEdges.h" @@ -257,6 +267,8 @@ #include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" +#include "llvm/Transforms/Utils/TaskCanonicalize.h" +#include "llvm/Transforms/Utils/TaskSimplify.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" @@ -264,8 +276,9 @@ using namespace llvm; -static const Regex DefaultAliasRegex( - "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); +static const Regex + DefaultAliasRegex("^(default|thinlto-pre-link|thinlto|lto-pre-link|lto|" + "tapir-lowering|tapir-lowering-loops)<(O[0123sz])>$"); namespace llvm { cl::opt PrintPipelinePasses( @@ -1094,7 +1107,7 @@ Expected parseMemProfUsePassOptions(StringRef Params) { /// alias. static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { return Name.startswith("default") || Name.startswith("thinlto") || - Name.startswith("lto"); + Name.startswith("lto") || Name.startswith("tapir-lowering"); } /// Tests whether registered callbacks will accept a given pass name. @@ -1408,6 +1421,10 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM, MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L)); else MPM.addPass(buildLTOPreLinkDefaultPipeline(L)); + } else if (Matches[1] == "tapir-lowering-loops") { + MPM.addPass(buildTapirLoopLoweringPipeline(L, ThinOrFullLTOPhase::None)); + } else if (Matches[1] == "tapir-lowering") { + MPM.addPass(buildTapirLoweringPipeline(L, ThinOrFullLTOPhase::None)); } else { assert(Matches[1] == "lto" && "Not one of the matched options!"); MPM.addPass(buildLTODefaultPipeline(L, nullptr)); diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 660cb2e974d781..e23b96d4580212 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -18,12 +18,14 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/DataRaceFreeAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Passes/PassBuilder.h" #include "llvm/Support/CommandLine.h" @@ -118,6 +120,11 @@ #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" +#include "llvm/Transforms/Tapir/LoopSpawningTI.h" +#include "llvm/Transforms/Tapir/LoopStripMinePass.h" +#include "llvm/Transforms/Tapir/SerializeSmallTasks.h" +#include "llvm/Transforms/Tapir/TapirToTarget.h" +#include "llvm/Transforms/Tapir/DRFScopedNoAliasAA.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" @@ -129,6 +136,8 @@ #include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/RelLookupTableConverter.h" #include "llvm/Transforms/Utils/SimplifyCFGOptions.h" +#include "llvm/Transforms/Utils/TaskCanonicalize.h" +#include "llvm/Transforms/Utils/TaskSimplify.h" #include "llvm/Transforms/Vectorize/LoopVectorize.h" #include "llvm/Transforms/Vectorize/SLPVectorizer.h" #include "llvm/Transforms/Vectorize/VectorCombine.h" @@ -274,10 +283,16 @@ cl::opt EnableMemProfContextDisambiguation( "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); +static cl::opt + VerifyTapirLowering("verify-tapir-lowering-npm", cl::init(false), + cl::Hidden, + cl::desc("Verify IR after Tapir lowering steps")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; SLPVectorization = false; + LoopStripmine = true; LoopUnrolling = true; ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; LicmMssaOptCap = SetLicmMssaOptCap; @@ -354,6 +369,16 @@ void PassBuilder::invokePipelineEarlySimplificationEPCallbacks( for (auto &C : PipelineEarlySimplificationEPCallbacks) C(MPM, Level); } +void PassBuilder::invokeTapirLateEPCallbacks(ModulePassManager &MPM, + OptimizationLevel Level) { + for (auto &C : TapirLateEPCallbacks) + C(MPM, Level); +} +void PassBuilder::invokeTapirLoopEndEPCallbacks(ModulePassManager &MPM, + OptimizationLevel Level) { + for (auto &C : TapirLoopEndEPCallbacks) + C(MPM, Level); +} // Helper to add AnnotationRemarksPass. static void addAnnotationRemarksPass(ModulePassManager &MPM) { @@ -386,6 +411,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // Hoisting of scalars and load expressions. FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); FPM.addPass(LibCallsShrinkWrapPass()); @@ -394,6 +420,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -463,6 +490,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, /*UseBlockFrequencyInfo=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. // *All* loop passes must preserve it, in order to be able to use it. @@ -501,6 +529,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(ADCEPass()); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -551,6 +580,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); FPM.addPass(AggressiveInstCombinePass()); @@ -571,6 +601,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(TailCallElimPass()); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) @@ -642,6 +673,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, /*UseBlockFrequencyInfo=*/true)); FPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. @@ -711,6 +743,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, .convertSwitchRangeToICmp(true) .hoistCommonInsts(true) .sinkCommonInsts(true))); + FPM.addPass(TaskSimplifyPass()); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -1085,6 +1118,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, invokePeepholeEPCallbacks(GlobalCleanupPM, Level); GlobalCleanupPM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + GlobalCleanupPM.addPass(TaskSimplifyPass()); MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM), PTO.EagerlyInvalidateAnalyses)); @@ -1188,6 +1222,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, /*UseBlockFrequencyInfo=*/true)); ExtraPasses.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + // Cleanup tasks after the loop optimization passes. + ExtraPasses.addPass(TaskSimplifyPass()); ExtraPasses.addPass(InstCombinePass()); FPM.addPass(std::move(ExtraPasses)); } @@ -1225,6 +1261,9 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // Enhance/cleanup vector code. FPM.addPass(VectorCombinePass()); + // Rerun EarlyCSE for further cleanup. + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + if (!IsFullLTO) { FPM.addPass(InstCombinePass()); // Unroll small loops to hide loop backedge latency and saturate any @@ -1347,6 +1386,33 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, // rather than on each loop in an inside-out manner, and so they are actually // function passes. + // Stripmine Tapir loops, if pass is enabled. + if (PTO.LoopStripmine && Level != OptimizationLevel::O1 && + !Level.isOptimizingForSize()) { + LoopPassManager LPM1, LPM2; + LPM1.addPass(TapirIndVarSimplifyPass()); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM1), + /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + OptimizePM.addPass(LoopStripMinePass()); + // Cleanup tasks after stripmining loops. + OptimizePM.addPass(TaskSimplifyPass()); + // Cleanup after stripmining loops. + LPM2.addPass(LoopSimplifyCFGPass()); + LPM2.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM2), + /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + // Don't run IndVarSimplify at this point, as it can actually inhibit + // vectorization in some cases. + OptimizePM.addPass(JumpThreadingPass()); + OptimizePM.addPass(CorrelatedValuePropagationPass()); + OptimizePM.addPass(InstCombinePass()); + } + invokeVectorizerStartEPCallbacks(OptimizePM, Level); LoopPassManager LPM; @@ -1395,6 +1461,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, OptimizePM.addPass( SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + // Cleanup tasks as well. + OptimizePM.addPass(TaskSimplifyPass()); + // Add the core optimizing pipeline. MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); @@ -1437,11 +1506,184 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, return MPM; } +ModulePassManager +PassBuilder::buildTapirLoopLoweringPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + ModulePassManager MPM; + + LoopPassManager LPM1, LPM2; + + if (Level == OptimizationLevel::O0) + // Form SSA out of local memory accesses. + MPM.addPass( + createModuleToFunctionPassAdaptor(SROAPass(SROAOptions::ModifyCFG))); + + // Rotate Loop - disable header duplication at -Oz + LPM1.addPass(LoopRotatePass(Level != OptimizationLevel::Oz)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + LPM2.addPass(IndVarSimplifyPass()); + + FunctionPassManager FPM; + // The loop pass in LPM2 (IndVarSimplifyPass) does not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), + /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp( + true))); // Merge & remove basic blocks. + FPM.addPass(InstCombinePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), + /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + // Outline Tapir loops as needed. + MPM.addPass(LoopSpawningPass()); + if (VerifyTapirLowering) + MPM.addPass(VerifierPass()); + + // The LoopSpawning pass may leave cruft around. Clean it up using the + // function simplification pipeline. + if (Level != OptimizationLevel::O0) + MPM.addPass( + createModuleToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase))); + + return MPM; +} + +ModulePassManager +PassBuilder::buildTapirLoweringPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase Phase) { + ModulePassManager MPM; + + if (Level == OptimizationLevel::O0) { + // At -O0, simply translate the Tapir constructs and run always-inline. In + // particular, don't run loop-spawning. + + // Add passes to run just after Tapir loops are (or would be) processed. + for (auto &C : TapirLoopEndEPCallbacks) + C(MPM, Level); + + // Lower Tapir constructs to target runtime calls. + MPM.addPass(TapirToTargetPass()); + if (VerifyTapirLowering) + MPM.addPass(VerifierPass()); + + MPM.addPass(AlwaysInlinerPass( + /*InsertLifetimeIntrinsics=*/false)); + + return MPM; + } + + // Lower Tapir loops + MPM.addPass(buildTapirLoopLoweringPipeline(Level, Phase)); + + // Add passes to run just after Tapir loops are processed. + invokeTapirLoopEndEPCallbacks(MPM, Level); + + // Canonicalize the representation of tasks. + MPM.addPass(createModuleToFunctionPassAdaptor(TaskCanonicalizePass())); + + // Lower Tapir to target runtime calls. + MPM.addPass(TapirToTargetPass()); + if (VerifyTapirLowering) + MPM.addPass(VerifierPass()); + + // The TapirToTarget pass may leave cruft around. Clean it up using the + // function simplification pipeline. + MPM.addPass( + createModuleToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase))); + + // Interprocedural constant propagation now that basic cleanup has occurred + // and prior to optimizing globals. + // FIXME: This position in the pipeline hasn't been carefully considered in + // years, it should be re-analyzed. + MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + + // Optimize globals to try and fold them into constants. + MPM.addPass(GlobalOptPass()); + + // Promote any localized globals to SSA registers. + // FIXME: Should this instead by a run of SROA? + // FIXME: We should probably run instcombine and simplify-cfg afterward to + // delete control flows that are dead once globals have been folded to + // constants. + MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); + + // Remove any dead arguments exposed by cleanups and constant folding + // globals. + MPM.addPass(DeadArgumentEliminationPass()); + + // Create a small function pass pipeline to cleanup after all the global + // optimizations. + FunctionPassManager GlobalCleanupPM; + GlobalCleanupPM.addPass(InstCombinePass()); + GlobalCleanupPM.addPass( + SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true))); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); + + // Synthesize function entry counts for non-PGO compilation. + if (EnableSyntheticCounts) + MPM.addPass(SyntheticCountsPropagation()); + + MPM.addPass(AlwaysInlinerPass( + /*InsertLifetimeIntrinsics=*/false)); + + // Require the GlobalsAA analysis for the module so we can query it within + // the CGSCC pipeline. + MPM.addPass(RequireAnalysisPass()); + + // Begin the postoder CGSCC pipeline. + CGSCCPassManager PostLowerCGPipeline; + + // Now deduce any function attributes based in the current code. + PostLowerCGPipeline.addPass(PostOrderFunctionAttrsPass()); + + // When at O3 add argument promotion to the pass pipeline. + // FIXME: It isn't at all clear why this should be limited to O3. + if (Level == OptimizationLevel::O3) + PostLowerCGPipeline.addPass(ArgumentPromotionPass()); + + // Lastly, add the core function simplification pipeline nested inside the + // CGSCC walk. + PostLowerCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase))); + + // We wrap the CGSCC pipeline in a devirtualization repeater. This will try + // to detect when we devirtualize indirect calls and iterate the SCC passes + // in that case to try and catch knock-on inlining or function attrs + // opportunities. Then we add it to the module pipeline by walking the SCCs + // in postorder (or bottom-up). + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( + std::move(PostLowerCGPipeline), MaxDevirtIterations))); + + // Drop bodies of available eternally objects to improve GlobalDCE. + MPM.addPass(EliminateAvailableExternallyPass()); + + // Do RPO function attribute inference across the module to forward-propagate + // attributes where applicable. + // FIXME: Is this really an optimization rather than a canonicalization? + MPM.addPass(ReversePostOrderFunctionAttrsPass()); + + // Now that we have optimized the program, discard unreachable functions. + MPM.addPass(GlobalDCEPass()); + + return MPM; +} + ModulePassManager PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, - bool LTOPreLink) { + bool LTOPreLink, bool LowerTapir) { if (Level == OptimizationLevel::O0) - return buildO0DefaultPipeline(Level, LTOPreLink); + return buildO0DefaultPipeline(Level, LTOPreLink, LowerTapir); ModulePassManager MPM; @@ -1475,6 +1717,18 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); + + // Add passes to run just before Tapir lowering. + invokeTapirLateEPCallbacks(MPM, Level); + + // Lower Tapir if necessary + if (LowerTapir) + MPM.addPass(buildTapirLoweringPipeline( + Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink + : ThinOrFullLTOPhase::None)); + else + invokeTapirLoopEndEPCallbacks(MPM, Level); + return MPM; } @@ -1544,7 +1798,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) { } ModulePassManager PassBuilder::buildThinLTODefaultPipeline( - OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) { + OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary, + bool LowerTapir) { ModulePassManager MPM; if (ImportSummary) { @@ -1592,6 +1847,16 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline( MPM.addPass(buildModuleOptimizationPipeline( Level, ThinOrFullLTOPhase::ThinLTOPostLink)); + // Add passes to run just before Tapir lowering. + invokeTapirLateEPCallbacks(MPM, Level); + + // Lower Tapir if necessary + if (LowerTapir) + MPM.addPass( + buildTapirLoweringPipeline(Level, ThinOrFullLTOPhase::ThinLTOPostLink)); + else + invokeTapirLoopEndEPCallbacks(MPM, Level); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1607,7 +1872,8 @@ PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) { ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, - ModuleSummaryIndex *ExportSummary) { + ModuleSummaryIndex *ExportSummary, + bool LowerTapir) { ModulePassManager MPM; invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level); @@ -1915,6 +2181,16 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level); + // Add passes to run just before Tapir lowering. + invokeTapirLateEPCallbacks(MPM, Level); + + // Lower Tapir if necessary + if (LowerTapir) + MPM.addPass( + buildTapirLoweringPipeline(Level, ThinOrFullLTOPhase::FullLTOPostLink)); + else + invokeTapirLoopEndEPCallbacks(MPM, Level); + // Emit annotation remarks. addAnnotationRemarksPass(MPM); @@ -1922,7 +2198,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, } ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, - bool LTOPreLink) { + bool LTOPreLink, + bool LowerTapir) { assert(Level == OptimizationLevel::O0 && "buildO0DefaultPipeline should only be used with O0"); @@ -2011,6 +2288,16 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, CoroPM.addPass(GlobalDCEPass()); MPM.addPass(CoroConditionalWrapper(std::move(CoroPM))); + // Add passes to run just before Tapir lowering. + invokeTapirLateEPCallbacks(MPM, Level); + + if (LowerTapir) + MPM.addPass(buildTapirLoweringPipeline( + Level, LTOPreLink ? ThinOrFullLTOPhase::FullLTOPreLink + : ThinOrFullLTOPhase::None)); + else + invokeTapirLoopEndEPCallbacks(MPM, Level); + invokeOptimizerLastEPCallbacks(MPM, Level); if (LTOPreLink) @@ -2021,6 +2308,84 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level, return MPM; } +ModulePassManager +PassBuilder::buildPostCilkInstrumentationPipeline(OptimizationLevel Level) { + ModulePassManager MPM; + if (Level != OptimizationLevel::O0) { + FunctionPassManager FPM; + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(ReassociatePass()); + LoopPassManager LPM; + // Simplify the loop body. We do this initially to clean up after + // other loop passes run, either when iterating on a loop or on + // inner loops with implications on the outer loop. + LPM.addPass(LoopInstSimplifyPass()); + LPM.addPass(LoopSimplifyCFGPass()); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == + OptimizationLevel::O3)); + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SCCPPass()); + FPM.addPass(BDCEPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(DSEPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3) { + MPM.addPass(ModuleInlinerWrapperPass( + getInlineParams(Level.getSpeedupLevel(), Level.getSizeLevel()))); + // Optimize globals. + MPM.addPass(GlobalOptPass()); + MPM.addPass(GlobalDCEPass()); + FunctionPassManager FPM; + FPM.addPass(SROAPass(SROAOptions::ModifyCFG)); + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(ReassociatePass()); + LoopPassManager LPM; + // Simplify the loop body. We do this initially to clean up + // after other loop passes run, either when iterating on a loop + // or on inner loops with implications on the outer loop. + LPM.addPass(LoopInstSimplifyPass()); + LPM.addPass(LoopSimplifyCFGPass()); + LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap, + /*AllowSpeculation=*/true)); + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(SCCPPass()); + FPM.addPass(BDCEPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(DSEPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + } + } + MPM.addPass(EliminateAvailableExternallyPass()); + MPM.addPass(GlobalDCEPass()); + + return MPM; +} + AAManager PassBuilder::buildDefaultAAPipeline() { AAManager AA; @@ -2044,6 +2409,11 @@ AAManager PassBuilder::buildDefaultAAPipeline() { if (EnableGlobalAnalyses) AA.registerModuleAnalysis(); + if (EnableDRFAA) + // Add support for using Tapir parallel control flow to inform alias + // analysis based on the data-race-free assumption. + AA.registerFunctionAnalysis(); + // Add target-specific alias analyses. if (TM) TM->registerDefaultAliasAnalyses(AA); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index e10dc995c49305..e80012a3ab3e12 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -49,10 +49,13 @@ MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass()) MODULE_PASS("cg-profile", CGProfilePass()) MODULE_PASS("check-debugify", NewPMCheckDebugifyPass()) +MODULE_PASS("cilksan", CilkSanitizerPass()) MODULE_PASS("constmerge", ConstantMergePass()) MODULE_PASS("coro-early", CoroEarlyPass()) MODULE_PASS("coro-cleanup", CoroCleanupPass()) MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) +MODULE_PASS("csi", ComprehensiveStaticInstrumentationPass()) +MODULE_PASS("csi-setup", CSISetupPass()) MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) MODULE_PASS("debugify", NewPMDebugifyPass()) MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) @@ -77,6 +80,7 @@ MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) +MODULE_PASS("loop-spawning", LoopSpawningPass()) MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass()) MODULE_PASS("lower-ifunc", LowerIFuncPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) @@ -115,6 +119,7 @@ MODULE_PASS("strip-debug-declare", StripDebugDeclarePass()) MODULE_PASS("strip-nondebug", StripNonDebugSymbolsPass()) MODULE_PASS("strip-nonlinetable-debuginfo", StripNonLineTableDebugInfoPass()) MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) +MODULE_PASS("tapir2target", TapirToTargetPass()) MODULE_PASS("trigger-crash", TriggerCrashPass()) MODULE_PASS("verify", VerifierPass()) MODULE_PASS("view-callgraph", CallGraphViewerPass()) @@ -260,6 +265,7 @@ FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) +FUNCTION_ANALYSIS("race-detect", TapirRaceDetect()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis()) FUNCTION_ANALYSIS("should-run-extra-vector-passes", ShouldRunExtraVectorPasses()) @@ -267,6 +273,7 @@ FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) +FUNCTION_ANALYSIS("tasks", TaskAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis()) @@ -280,6 +287,7 @@ FUNCTION_ALIAS_ANALYSIS("objc-arc-aa", objcarc::ObjCARCAA()) FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA()) FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA()) FUNCTION_ALIAS_ANALYSIS("tbaa", TypeBasedAA()) +FUNCTION_ALIAS_ANALYSIS("drf-aa", DRFAA()) #undef FUNCTION_ALIAS_ANALYSIS #undef FUNCTION_ANALYSIS @@ -314,6 +322,7 @@ FUNCTION_PASS("dot-dom", DomPrinter()) FUNCTION_PASS("dot-dom-only", DomOnlyPrinter()) FUNCTION_PASS("dot-post-dom", PostDomPrinter()) FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter()) +FUNCTION_PASS("drf-scoped-noalias", DRFScopedNoAliasPass()) FUNCTION_PASS("view-dom", DomViewer()) FUNCTION_PASS("view-dom-only", DomOnlyViewer()) FUNCTION_PASS("view-post-dom", PostDomViewer()) @@ -363,6 +372,7 @@ FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-fusion", LoopFusePass()) FUNCTION_PASS("loop-distribute", LoopDistributePass()) FUNCTION_PASS("loop-versioning", LoopVersioningPass()) +FUNCTION_PASS("loop-stripmine", LoopStripMinePass()) FUNCTION_PASS("objc-arc", ObjCARCOptPass()) FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass()) FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass()) @@ -388,10 +398,12 @@ FUNCTION_PASS("print", FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAWalkerPrinterPass(dbgs())) FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) +FUNCTION_PASS("print", TapirRaceDetectPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) FUNCTION_PASS("print", LoopAccessInfoPrinterPass(dbgs())) +FUNCTION_PASS("print", TaskPrinterPass(dbgs())) // TODO: rename to print after NPM switch FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs())) FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(dbgs())) @@ -405,6 +417,7 @@ FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) +FUNCTION_PASS("serialize-small-tasks", SerializeSmallTasksPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) @@ -413,6 +426,8 @@ FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) +FUNCTION_PASS("task-canonicalize", TaskCanonicalizePass()) +FUNCTION_PASS("task-simplify", TaskSimplifyPass()) FUNCTION_PASS("typepromotion", TypePromotionPass(TM)) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) @@ -614,6 +629,7 @@ LOOP_PASS("guard-widening", GuardWideningPass()) LOOP_PASS("loop-bound-split", LoopBoundSplitPass()) LOOP_PASS("loop-reroll", LoopRerollPass()) LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass()) +LOOP_PASS("tapir-indvars", TapirIndVarSimplifyPass()) #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 6e721b9378468d..bfd4b25865190b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -934,6 +934,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::BITCAST, MVT::f16, Custom); setOperationAction(ISD::BITCAST, MVT::bf16, Custom); + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + // Indexed loads and stores are supported. for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { @@ -2573,6 +2576,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::CALL_BTI) MAKE_CASE(AArch64ISD::MRRS) MAKE_CASE(AArch64ISD::MSRR) + MAKE_CASE(AArch64ISD::EH_SJLJ_SETJMP) + MAKE_CASE(AArch64ISD::EH_SJLJ_LONGJMP) } #undef MAKE_CASE return nullptr; @@ -2798,6 +2803,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitFill(MI, BB); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); + case AArch64::AArch64_setjmp_instr: + return EmitSetjmp(MI, BB); + case AArch64::AArch64_longjmp_instr: + return EmitLongjmp(MI, BB); } } @@ -6112,6 +6121,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return Result; } + case ISD::EH_SJLJ_SETJMP: + return LowerSetjmp(Op, DAG); + case ISD::EH_SJLJ_LONGJMP: + return LowerLongjmp(Op, DAG); } } @@ -26051,3 +26064,187 @@ bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const { } return true; } + +SDValue AArch64TargetLowering::LowerSetjmp(SDValue Op, + SelectionDAG &DAG) const { + return DAG.getNode(AArch64ISD::EH_SJLJ_SETJMP, SDLoc(Op), + DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), + Op.getOperand(1)); +} + +SDValue AArch64TargetLowering::LowerLongjmp(SDValue Op, + SelectionDAG &DAG) const { + return DAG.getNode(AArch64ISD::EH_SJLJ_LONGJMP, SDLoc(Op), MVT::Other, + Op.getOperand(0), Op.getOperand(1)); +} + +MachineBasicBlock * +AArch64TargetLowering::EmitSetjmp(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const AArch64RegisterInfo *TRI = + &Subtarget->getInstrInfo()->getRegisterInfo(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + DebugLoc DL = MI.getDebugLoc(); + + const BasicBlock *BB = MBB->getBasicBlock(); + MachineFunction::iterator I = ++MBB->getIterator(); + + // Memory Reference + SmallVector MMOs(MI.memoperands_begin(), + MI.memoperands_end()); + + Register DstReg = MI.getOperand(0).getReg(); + const TargetRegisterClass *RC = MRI.getRegClass(DstReg); + assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); + Register mainDstReg = MRI.createVirtualRegister(RC); + Register restoreDstReg = MRI.createVirtualRegister(RC); + Register AddrReg = MI.getOperand(1).getReg(); + + MVT PVT = getPointerTy(MF->getDataLayout()); + assert(PVT == MVT::i64 && "Invalid Pointer Size!"); + + // For v = setjmp(buf), we generate + // + // thisMBB: + // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB + // SjLjSetup restoreMBB + // + // mainMBB: + // v_main = 0 + // + // sinkMBB: + // v = phi(main, restore) + // + // restoreMBB: + // v_restore = 1 + + MachineBasicBlock *thisMBB = MBB; + MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); + MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(I, mainMBB); + MF->insert(I, sinkMBB); + MF->push_back(restoreMBB); + restoreMBB->setIsEHPad(true); + + MachineInstrBuilder MIB; + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), MBB, + std::next(MachineBasicBlock::iterator(MI)), MBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); + + // thisMBB: + unsigned LabelReg = 0; + + // TODO: The four stores generated by setjmp should be merged into two + // pairs. They are generated out of order by two separate blocks of + // code (0+2 by machine independent code and 1+3 here). + + // Calculate resume address. ADR has +/- 1 MB range. + LabelReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(AArch64::ADR), LabelReg) + .addMBB(restoreMBB); + MIB = BuildMI(*thisMBB, MI, DL, TII->get(AArch64::STRXui)); + MIB.addReg(LabelReg); + MIB.addReg(AddrReg); + MIB.addImm(1); // scaled by word size + MIB.setMemRefs(MMOs); + + MIB = BuildMI(*thisMBB, MI, DL, TII->get(AArch64::STRXui)); + MIB.addReg(TRI->hasBasePointer(*MF) ? TRI->getBaseRegister() : AArch64::XZR); + MIB.addReg(AddrReg); + MIB.addImm(3); // scaled by word size + + // x86 has cf-protection-return check here + + // Add a special terminator instruction to make the resume block reachable. + MIB = BuildMI(*thisMBB, MI, DL, TII->get(AArch64::EH_SjLj_Setup)) + .addMBB(restoreMBB); + // TODO: This unnecessarily flushes registers on the fallthrough + // path even though only restoreMBB loses register state. The data + // loss needs to be added to the edge. Putting the register mask in + // the destination block is too late because the compiler will put + // spills of already-invalid registers before the invalidation note. + MIB.addRegMask(MRI.getTargetRegisterInfo()->getNoPreservedMask()); + // For now these successors should not have branch probabilities. + // Although mainMBB is much more likely, adding probabilities causes + // poor code generation later, in part by suppressing tail duplication. + thisMBB->addSuccessor(mainMBB); + thisMBB->addSuccessor(restoreMBB); + + // mainMBB: dst = 0 << 0 + BuildMI(mainMBB, DL, TII->get(AArch64::MOVZWi), mainDstReg) + .addImm(0) + .addImm(0); + BuildMI(mainMBB, DL, TII->get(AArch64::B)).addMBB(sinkMBB); + mainMBB->addSuccessor(sinkMBB); + + // sinkMBB: + BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(AArch64::PHI), DstReg) + .addReg(mainDstReg) + .addMBB(mainMBB) + .addReg(restoreDstReg) + .addMBB(restoreMBB); + + // restoreMBB: dst = 1 << 0 + BuildMI(restoreMBB, DL, TII->get(AArch64::MOVZWi), restoreDstReg) + .addImm(1) + .addImm(0); + BuildMI(restoreMBB, DL, TII->get(AArch64::B)).addMBB(sinkMBB); + restoreMBB->addSuccessor(sinkMBB); + + MI.eraseFromParent(); + return sinkMBB; +} + +MachineBasicBlock * +AArch64TargetLowering::EmitLongjmp(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const AArch64RegisterInfo *TRI = static_cast( + MF->getSubtarget().getRegisterInfo()); + DebugLoc DL = MI.getDebugLoc(); + Register PC = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + Register AddrReg = MI.getOperand(0).getReg(); + Register StackReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + MachineInstrBuilder MIB; + + // The frame pointer is overwritten by the first load so + // copy it to a temporary register if necessary. + if (AddrReg == AArch64::FP || AddrReg == AArch64::SP) { + Register AddrTmp = MRI.createVirtualRegister(&AArch64::GPR64RegClass); + MIB = BuildMI(*MBB, MI, DL, TII->get(AArch64::ORRXri), AddrTmp); + MIB.addReg(AddrReg); + MIB.addImm(0); + AddrReg = AddrTmp; + MI.getOperand(0).ChangeToRegister(AddrTmp, false, false, true); + } + + // FP, PC + MIB = BuildMI(*MBB, MI, DL, TII->get(AArch64::LDPXi)); + MIB.addReg(AArch64::FP, RegState::Define); + MIB.addReg(PC, RegState::Define); + MIB.addReg(AddrReg); + MIB.addImm(0); // scaled by word size + // SP (indirectly) and X19 + // X19 may be used as the base pointer for an over-aligned stack frame. + // If not, the setjmp restore block does not expect values in X19 to be live. + MIB = BuildMI(*MBB, MI, DL, TII->get(AArch64::LDPXi)); + MIB.addReg(StackReg, RegState::Define); + MIB.addReg(TRI->getBaseRegister(), RegState::Define); + MIB.addReg(AddrReg); + MIB.addImm(2); // scaled by word size + MIB = BuildMI(*MBB, MI, DL, TII->get(AArch64::ADDXri), AArch64::SP); + MIB.addReg(StackReg); + MIB.addImm(0); // immediate + MIB.addImm(0); // shift count + MIB = BuildMI(*MBB, MI, DL, TII->get(AArch64::BR)); + MIB.addReg(PC); + MI.eraseFromParent(); + return MBB; +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index aca45f113e7366..ac1eeb407425a1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -440,6 +440,10 @@ enum NodeType : unsigned { // chain = MSRR(chain, sysregname, lo64, hi64) MSRR, + // Builtin setjmp and longjmp + EH_SJLJ_SETJMP, + EH_SJLJ_LONGJMP, + // Strict (exception-raising) floating point comparison STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE, STRICT_FCMPE, @@ -1108,6 +1112,8 @@ class AArch64TargetLowering : public TargetLowering { SDValue &Size, SelectionDAG &DAG) const; SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const; + SDValue LowerSetjmp(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerLongjmp(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op, SelectionDAG &DAG) const; @@ -1247,6 +1253,9 @@ class AArch64TargetLowering : public TargetLowering { LLT Ty2) const override; bool preferScalarizeSplat(SDNode *N) const override; + MachineBasicBlock *EmitSetjmp(MachineInstr &MI, MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitLongjmp(MachineInstr &MI, + MachineBasicBlock *MBB) const; }; namespace AArch64 { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 0691e07a639bee..bf3881b9553dbb 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8428,6 +8428,84 @@ unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) { return AArch64::BLR; } +std::optional +AArch64InstrInfo::isZeroTest(MachineBasicBlock &MBB) const { + const AArch64RegisterInfo *TRI = &getRegisterInfo(); + MachineBasicBlock *U = nullptr, *Zero = nullptr, *Nonzero = nullptr; + + MachineBasicBlock::const_reverse_instr_iterator MI = MBB.instr_rbegin(); + while (MI != MBB.instr_rend() && MI->isUnconditionalBranch()) { + U = getBranchDestBlock(*MI); + ++MI; + } + + if (MI == MBB.instr_rend()) + return std::optional(); + + switch (MI->getOpcode()) { + case AArch64::CBNZW: + case AArch64::CBNZX: + Zero = U; + Nonzero = MI->getOperand(1).getMBB(); + break; + case AArch64::CBZW: + case AArch64::CBZX: + Nonzero = U; + Zero = MI->getOperand(1).getMBB(); + break; + default: + return std::optional(); + } + + BlockBRNZ Desc; + Desc.IsKill = MI->getOperand(0).isKill(); + Desc.Regs.push_back(MI->getOperand(0).getReg()); + Desc.Zero = Zero; + Desc.Nonzero = Nonzero; + + const Register &Reg0 = Desc.Regs[0]; + + while (++MI != MBB.instr_rend()) { + if (MI->isPHI()) { + if (MI->getOperand(0).getReg() == Reg0) { + unsigned NumOperands = MI->getNumOperands(); + for (unsigned I = 1; I < NumOperands; I += 2) { + Desc.Regs.push_back(MI->getOperand(I).getReg()); + } + } + // There should be only one PHI setting the register. + return Desc; + } + if (MI->modifiesRegister(Reg0, TRI)) + return std::optional(); + if (MI->readsRegister(Reg0, TRI)) + Desc.IsKill = false; + } + return Desc; +} + +bool +AArch64InstrInfo::isSetConstant(const MachineInstr &MI, Register &Reg, + int64_t &Value) const { + if (MI.getNumOperands() < 3 || !MI.getOperand(0).isReg()) + return false; + // describeLoadedValue, but ParamLoadedValue is complicated... + switch (MI.getOpcode()) { + case AArch64::MOVZWi: + case AArch64::MOVZXi: { + if (!MI.getOperand(1).isImm()) + return false; + Reg = MI.getOperand(0).getReg(); + int64_t Immediate = MI.getOperand(1).getImm(); + int Shift = MI.getOperand(2).getImm(); + Value = Immediate << Shift; + // range check is easier than worrying about extension and truncation + return (Value & 0x7fffffff) == Value; + } + } + return false; +} + #define GET_INSTRINFO_HELPERS #define GET_INSTRMAP_INFO #include "AArch64GenInstrInfo.inc" diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 20210a96d67ad2..e74fe6b3ac74b0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -254,6 +254,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { const MachineRegisterInfo *MRI) const override; bool optimizeCondBranch(MachineInstr &MI) const override; + std::optional isZeroTest(MachineBasicBlock &MBB) const override; + bool isSetConstant(const MachineInstr &MI, Register &Reg, + int64_t &Value) const override; + /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. /// \param Pattern - combiner pattern diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 9e72d37880c58f..b2f3afa9017786 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1607,8 +1607,10 @@ def : Pat<(AArch64mrs imm:$id), // The thread pointer (on Linux, at least, where this has been implemented) is // TPIDR_EL0. +let mayLoad = 1 in { def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>; +} // This gets lowered into a 24-byte instruction sequence let Defs = [ X9, X16, X17, NZCV ], Size = 24 in { @@ -9144,6 +9146,30 @@ let Predicates = [HasD128] in { } } +def AArch64eh_sjlj_setjmp : SDNode<"AArch64ISD::EH_SJLJ_SETJMP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>, + [SDNPHasChain, SDNPSideEffect]>; +def AArch64eh_sjlj_longjmp : SDNode<"AArch64ISD::EH_SJLJ_LONGJMP", + SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPSideEffect]>; + +let isCodeGenOnly = 1, usesCustomInserter = 1, hasNoSchedulingInfo = 1 in { + def AArch64_setjmp_instr : Pseudo<(outs GPR32:$dst), (ins GPR64:$buf), + [(set GPR32:$dst, (AArch64eh_sjlj_setjmp GPR64:$buf))]>; + def AArch64_longjmp_instr : Pseudo<(outs), (ins GPR64:$buf), + [(AArch64eh_sjlj_longjmp GPR64:$buf)]>; +} + +// This instruction is needed to make the longjmp target block reachable. +def EH_SjLj_Setup : AArch64Inst { + let isTerminator = 1; + let isCodeGenOnly = 1; + let hasNoSchedulingInfo = 1; + let AsmString = "#EH_SjLj_Setup\t$dst"; + dag OutOperandList = (outs); + dag InOperandList = (ins am_brcond:$dst); +} + include "AArch64InstrAtomics.td" include "AArch64SVEInstrInfo.td" diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index d1ddf6d7697547..b21aff9cf78ee0 100644 --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -512,6 +512,12 @@ unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; } bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); + // For stealable functions, where the stack pointer can change dramatically + // during execution, the base pointer is the only reliable way to reference + // local variables. + if (MF.getFunction().hasFnAttribute(Attribute::Stealable)) + return true; + // In the presence of variable sized objects or funclets, if the fixed stack // size is large enough that referencing from the FP won't result in things // being in range relatively often, we can use a base pointer to allow access diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 2dbbab13e8f3b9..142325b0648934 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -679,7 +679,8 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, return; } - if (MI.getOpcode() == AArch64::SPACE) { + if (MI.getOpcode() == AArch64::SPACE || + MI.getOpcode() == AArch64::EH_SjLj_Setup) { // SPACE just increases basic block size, in both cases no actual code. return; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0f1cb5f1e23665..b374232073292a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -38225,6 +38225,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, MachineFunction *MF = MBB->getParent(); const TargetInstrInfo *TII = Subtarget.getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); // Memory Reference SmallVector MMOs(MI.memoperands_begin(), @@ -38257,6 +38258,18 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); } + // Copy stack addresses to a temporary register. + if (MI.getOperand(0).isFI() || MI.readsRegister(FP, TRI) || + MI.readsRegister(SP, TRI)) { + Register AddrTmp = MRI.createVirtualRegister(RC); + unsigned LEA = (PVT == MVT::i64) ? X86::LEA64r : X86::LEA32r; + MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(LEA), AddrTmp); + for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { + MIB.add(MI.getOperand(i)); + } + MI.getOperand(0).ChangeToRegister(AddrTmp, false, false, true); + } + // Reload FP MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), FP); for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 10a0ccdcb02329..d39c9cd3715194 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3345,22 +3345,68 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB, unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { + return removeBranchImpl(MBB, BytesRemoved, false); +} + +unsigned X86InstrInfo::removeBranchAndFlags(MachineBasicBlock &MBB, + int *BytesRemoved) const { + return removeBranchImpl(MBB, BytesRemoved, true); +} + +unsigned X86InstrInfo::removeBranchImpl(MachineBasicBlock &MBB, + int *BytesRemoved, + bool DeleteFlags) const { assert(!BytesRemoved && "code size not handled"); + const X86RegisterInfo *TRI = &getRegisterInfo(); MachineBasicBlock::iterator I = MBB.end(); unsigned Count = 0; + bool FlagsDead = false; while (I != MBB.begin()) { --I; if (I->isDebugInstr()) continue; - if (I->getOpcode() != X86::JMP_1 && - X86::getCondFromBranch(*I) == X86::COND_INVALID) - break; - // Remove the branch. - I->eraseFromParent(); - I = MBB.end(); - ++Count; + if (I->getOpcode() == X86::JMP_1) { + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + continue; + } + if (X86::getCondFromBranch(*I) != X86::COND_INVALID) { + if (DeleteFlags && I->killsRegister(X86::EFLAGS, TRI)) { + FlagsDead = true; + } + // Remove the branch. + I->eraseFromParent(); + I = MBB.end(); + ++Count; + continue; + } + if (!FlagsDead) + continue; + if (I->hasUnmodeledSideEffects() || I->readsRegister(X86::EFLAGS, TRI)) { + FlagsDead = false; + continue; + } + if (I->modifiesRegister(X86::EFLAGS, TRI)) { + /* This is like allDefsAreDead but ignores EFLAGS. */ + for (const MachineOperand &MO : I->operands()) { + if (MO.isReg() && MO.getReg().id() != X86::EFLAGS && !MO.isUse() && + !MO.isDead()) { + FlagsDead = false; + break; + } + } + if (FlagsDead) { + FlagsDead = false; + I->eraseFromParent(); + I = MBB.end(); + ++Count; + continue; + } + } } return Count; @@ -4722,6 +4768,115 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, return true; } +std::optional +X86InstrInfo::isZeroTest(MachineBasicBlock &MBB) const { + const X86RegisterInfo *TRI = &getRegisterInfo(); + SmallVector Cond; + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + MachineBasicBlock *Zero = nullptr, *Nonzero = nullptr; + + if (analyzeBranch(MBB, TBB, FBB, Cond, false) || Cond.size() != 1) + return std::optional(); + + switch (Cond[0].getImm()) { + case X86::COND_E: + Nonzero = FBB; + Zero = TBB; + break; + case X86::COND_NE: + Nonzero = TBB; + Zero = FBB; + break; + default: + return std::optional(); + } + MachineBasicBlock::const_reverse_instr_iterator MI = MBB.instr_rbegin(); + while (MI != MBB.instr_rend() && MI->isUnconditionalBranch()) + ++MI; + + if (MI == MBB.instr_rend() || !MI->isConditionalBranch()) + return std::optional(); + + BlockBRNZ Desc; + Desc.Zero = Zero; + Desc.Nonzero = Nonzero; + + // Only handle conditional branches that kill EFLAGS, because + // that is the common case. + // if (!MI->killsRegister(X86::EFLAGS)) + // return false; + + while (++MI != MBB.instr_rend()) { + // TEST32rr is the usual instruction to compare against zero. + if (MI->getOpcode() == X86::TEST32rr) { + const MachineOperand &op = MI->getOperand(0); + if (op.getReg() != MI->getOperand(1).getReg()) + return std::optional(); + Desc.IsKill = op.isKill(); + Desc.Regs.push_back(op.getReg()); + break; + } + // If EFLAGS is set other than by TEST32rr, fail. + // TODO: Possibly also CMP32ri8? + if (MI->modifiesRegister(X86::EFLAGS, TRI)) + return std::optional(); + } + if (Desc.Regs.size() != 1) { + return std::optional(); + } + const Register &Reg0 = Desc.Regs[0]; + + while (++MI != MBB.instr_rend()) { + if (MI->isPHI()) { + if (MI->getOperand(0).getReg() == Reg0) { + unsigned NumOperands = MI->getNumOperands(); + for (unsigned I = 1; I < NumOperands; I += 2) { + Desc.Regs.push_back(MI->getOperand(I).getReg()); + } + } + // There should be only one PHI setting the register. + return Desc; + } + if (MI->modifiesRegister(Reg0, TRI)) + return std::optional(); + if (MI->readsRegister(Reg0, TRI)) + Desc.IsKill = false; + } + return Desc; +} + +bool X86InstrInfo::isSetConstant(const MachineInstr &MI, Register &Reg, + int64_t &Value) const { + if (MI.getNumOperands() < 1) + return false; + const MachineOperand &Op0 = MI.getOperand(0); + if (!Op0.isReg()) + return false; + Reg = Op0.getReg(); + switch (MI.getOpcode()) { + case X86::MOV32r0: + Value = 0; + return true; + case X86::MOV32r1: + Value = 1; + return true; + case X86::XOR32rr: + if (MI.getOperand(1).getReg() != Reg) + return false; + Value = 0; + return true; + case X86::MOV32ri: { + const MachineOperand &Src = MI.getOperand(1); + if (!Src.isImm()) + return false; + Value = Src.getImm(); + return true; + } + default: + return false; + } +} + /// Try to remove the load by folding it to a register /// operand at the use. We fold the load instructions if load defines a virtual /// register, the virtual register is used once in the same BB, and the diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index 82554032ebd661..d3af073a4d6b47 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -351,6 +351,8 @@ class X86InstrInfo final : public X86GenInstrInfo { unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved = nullptr) const override; + unsigned removeBranchAndFlags(MachineBasicBlock &MBB, + int *BytesRemoved = nullptr) const override; unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, @@ -535,6 +537,10 @@ class X86InstrInfo final : public X86GenInstrInfo { Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override; + std::optional isZeroTest(MachineBasicBlock &MBB) const override; + bool isSetConstant(const MachineInstr &MI, Register &Reg, + int64_t &Value) const override; + /// optimizeLoadInstr - Try to remove the load by folding it to a register /// operand at the use. We fold the load instructions if and only if the /// def and use are in the same BB. We only look at one load and see @@ -633,6 +639,9 @@ class X86InstrInfo final : public X86GenInstrInfo { } private: + unsigned removeBranchImpl(MachineBasicBlock &MBB, int *BytesRemoved, + bool DeleteFlags) const; + /// This is a helper for convertToThreeAddress for 8 and 16-bit instructions. /// We use 32-bit LEA to form 3-address code by promoting to a 32-bit /// super-register and then truncating back down to a 8/16-bit sub-register. diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index dda5f6de11e326..6d7d8199056927 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -9,3 +9,4 @@ add_subdirectory(Hello) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) add_subdirectory(CFGuard) +add_subdirectory(Tapir) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 034f1587ae8df4..a3db3092019607 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -68,6 +68,7 @@ add_llvm_component_library(LLVMipo ProfileData Scalar Support + TapirOpts TargetParser TransformUtils Vectorize diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp index 34299f9dbb2325..9577a596bdbc67 100644 --- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp @@ -43,6 +43,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/PassManager.h" @@ -200,6 +201,11 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody, } } continue; + } else if (isa(I) || isa(I) || isa(I)) { + // Tapir instructions only access memory accessed by other instructions in + // the function. Hence we let the other instructions determine the + // attribute of this function. + continue; } ModRefInfo MR = ModRefInfo::NoModRef; @@ -1380,6 +1386,13 @@ static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) { return false; if (const auto *CI = dyn_cast(&I)) { if (Function *Callee = CI->getCalledFunction()) { + // Ignore sync.unwind, detached.rethrow, and taskframe.resume when + // checking if a function can throw, since they are simply placeholders. + if (Intrinsic::sync_unwind == Callee->getIntrinsicID() || + Intrinsic::detached_rethrow == Callee->getIntrinsicID() || + Intrinsic::taskframe_resume == Callee->getIntrinsicID()) + return false; + // I is a may-throw call to a function inside our SCC. This doesn't // invalidate our current working assumption that the SCC is no-throw; we // just have to scan that other function. @@ -1584,9 +1597,48 @@ static void addNoRecurseAttrs(const SCCNodeSet &SCCNodes, for (auto &I : BB.instructionsWithoutDebug()) if (auto *CB = dyn_cast(&I)) { Function *Callee = CB->getCalledFunction(); - if (!Callee || Callee == F || !Callee->doesNotRecurse()) - // Function calls a potentially recursive function. - return; + if (!Callee || Callee == F || !Callee->doesNotRecurse()) { + if (Callee && Callee != F) + // Ignore certain intrinsics when inferring norecurse. + switch (Callee->getIntrinsicID()) { + default: return; + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_subfn_addr: + case Intrinsic::syncregion_start: + case Intrinsic::detached_rethrow: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_resume: + case Intrinsic::taskframe_load_guard: + case Intrinsic::sync_unwind: + continue; + } + else + // Function calls a potentially recursive function. + return; + } } // Every call was to a non-recursive function other than this function, and diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp index 18d5911d10f121..879219bfcf3720 100644 --- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp +++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp @@ -29,6 +29,7 @@ static bool inferAllPrototypeAttributes( if (F.isDeclaration() && !F.hasOptNone()) { if (!F.hasFnAttribute(Attribute::NoBuiltin)) Changed |= inferNonMandatoryLibFuncAttrs(F, GetTLI(F)); + Changed |= inferTapirTargetLibFuncAttributes(F, GetTLI(F)); Changed |= inferAttributesFromOthers(F); } diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index b88ba2dec24bae..08f9b83d0f34c1 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -826,6 +826,9 @@ PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB, if (I.isLifetimeStartOrEnd()) continue; + if (I.isTaskFrameMarker()) + continue; + if (auto *II = dyn_cast(&I)) { Intrinsic::ID IID = II->getIntrinsicID(); SmallVector Tys; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index d3ec6a7aa667be..38447dadcf8910 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include #include #include @@ -2659,6 +2660,127 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { })) return nullptr; break; + case Intrinsic::sync_unwind: { + // If the function does not throw, we don't need the sync.unwind. + if (II->getFunction()->doesNotThrow()) + return eraseInstFromFunction(CI); + + if (II != II->getParent()->getFirstNonPHIOrDbgOrLifetime()) { + // Check if the instruction at the start of II's block is a redundant + // sync.unwind. + const Value *SyncReg = CI.getArgOperand(0); + if (isSyncUnwind(II->getParent()->getFirstNonPHIOrDbgOrLifetime(), + SyncReg)) + return eraseInstFromFunction(CI); + } + // Check for any syncs that might use this sync.unwind. + int NumUsers = 0; + for (BasicBlock *Pred : predecessors(CI.getParent())) + if (isa(Pred->getTerminator())) { + ++NumUsers; + break; + } + // If didn't find any syncs that use this sync.unwind, remove it. + if (!NumUsers) + return eraseInstFromFunction(CI); + break; + } + case Intrinsic::syncregion_start: { + // Check for any users of this syncregion. + int NumUsers = 0; + for (User *U : II->users()) { + // Check for any Tapir instructions using this syncregion. + if (isa(U) || isa(U) || isa(U)) { + ++NumUsers; + break; + } + // Check for any Tapir intrinsics using this syncregion. + if (CallBase *CB = dyn_cast(U)) + if (isSyncUnwind(CB) || isDetachedRethrow(CB)) { + ++NumUsers; + break; + } + } + // If we have no users, it's safe to delete this syncregion. + if (!NumUsers) + return eraseInstFromFunction(CI); + break; + } + case Intrinsic::detached_rethrow: { + assert(isa(II)); + return eraseInstFromFunction(CI); + } + case Intrinsic::taskframe_use: { + // Remove a taskframe.use if it is not in a detached block. + BasicBlock *Parent = II->getParent(); + if (!Parent->getSinglePredecessor()) + return eraseInstFromFunction(CI); + + BasicBlock *Pred = Parent->getSinglePredecessor(); + if (!isa(Pred->getTerminator())) + return eraseInstFromFunction(CI); + + DetachInst *DI = cast(Pred->getTerminator()); + if (DI->getDetached() != Parent) + return eraseInstFromFunction(CI); + break; + } + case Intrinsic::taskframe_create: { + // Remove a taskframe.create if it has no uses. + int NumUsers = 0; + for (User *U : II->users()) { + if (Instruction *I = dyn_cast(U)) + if (isTapirIntrinsic(Intrinsic::taskframe_use, I) || + isTapirIntrinsic(Intrinsic::taskframe_end, I) || + isTaskFrameResume(I)) { + ++NumUsers; + break; + } + } + if (!NumUsers) + return eraseInstFromFunction(CI); + break; + } + case Intrinsic::taskframe_resume: { + assert(isa(II)); + return eraseInstFromFunction(CI); + } + case Intrinsic::tapir_runtime_end: { + Value *PrevRTStart = CI.getArgOperand(0); + // If there's a tapir.runtime.start in the same block after this + // tapir.runtime.end with no interesting instructions in between, eliminate + // both. + BasicBlock::iterator Iter(CI); + while (++Iter != CI.getParent()->end()) { + if (isTapirIntrinsic(Intrinsic::tapir_runtime_start, &*Iter)) { + // Replce the uses of the tapir.runtime.start with the argument to the + // tapir.runtime.end. + replaceInstUsesWith(*Iter, PrevRTStart); + eraseInstFromFunction(*Iter); + return eraseInstFromFunction(CI); + } + if (isa(&*Iter) && !isa(&*Iter)) + // We found a nontrivial call. Give up. + break; + } + break; + } + case Intrinsic::tapir_runtime_start: { + // If there's tapir.runtime.end in the same block after this + // tapir.runtime.start with no interesting instructions in between, + // eliminate both. + BasicBlock::iterator Iter(CI); + while (++Iter != CI.getParent()->end()) { + if (isTapirIntrinsic(Intrinsic::tapir_runtime_end, &*Iter, &CI)) { + eraseInstFromFunction(*Iter); + return eraseInstFromFunction(CI); + } + if (isa(&*Iter) && !isa(&*Iter)) + // We found a nontrivial call. Give up. + break; + } + break; + } case Intrinsic::assume: { Value *IIOperand = II->getArgOperand(0); SmallVector OpBundles; diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 767b7c7defbb6e..5f8a3caac1dd9d 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -3122,6 +3122,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { case EHPersonality::CoreCLR: case EHPersonality::Wasm_CXX: case EHPersonality::XL_CXX: + case EHPersonality::Cilk_CXX: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); @@ -3779,6 +3780,11 @@ bool InstCombinerImpl::tryToSinkInstruction(Instruction *I, // successor block. if (DestBlock->getUniquePredecessor() != I->getParent()) return false; + // We can't generally move an instruction that reads from memory past a + // detach or reattach. + if (isa(I->getParent()->getTerminator()) || + isa(I->getParent()->getTerminator())) + return false; for (BasicBlock::iterator Scan = std::next(I->getIterator()), E = I->getParent()->end(); Scan != E; ++Scan) @@ -3931,6 +3937,10 @@ bool InstCombinerImpl::run() { // Make sure these checks are done only once, naturally we do the checks // the first time we get the userparent, this will save compile time. if (NumUsers == 0) { + // Don't sink if the successor follows through a sync instruction. + if (isa(BB->getTerminator())) + return std::nullopt; + // Try sinking to another block. If that block is unreachable, then do // not bother. SimplifyCFG should handle it. if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index f4bf6db569f247..71c4f7350421ad 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/BinaryFormat/MachO.h" @@ -643,7 +644,7 @@ namespace { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { - AddressSanitizer(Module &M, const StackSafetyGlobalInfo *SSGI, + AddressSanitizer(Module &M, const StackSafetyGlobalInfo *SSGI, TaskInfo *TI, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false, AsanDetectStackUseAfterReturnMode UseAfterReturn = @@ -654,7 +655,7 @@ struct AddressSanitizer { UseAfterScope(UseAfterScope || ClUseAfterScope), UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn : UseAfterReturn), - SSGI(SSGI) { + TI(TI), SSGI(SSGI) { C = &(M.getContext()); DL = &M.getDataLayout(); LongSize = M.getDataLayout().getPointerSizeInBits(); @@ -715,6 +716,7 @@ struct AddressSanitizer { bool maybeInsertAsanInitAtFunctionEntry(Function &F); bool maybeInsertDynamicShadowAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); + void recordInterestingParallelAllocas(const Function &F); private: friend struct FunctionStackPoisoner; @@ -758,6 +760,9 @@ struct AddressSanitizer { FunctionCallee AsanPtrCmpFunction, AsanPtrSubFunction; Constant *AsanShadowGlobal; + // Analyses + TaskInfo *TI; + // These arrays is indexed by AccessIsWrite, Experiment and log2(AccessSize). FunctionCallee AsanErrorCallback[2][2][kNumberOfAccessSizes]; FunctionCallee AsanMemoryAccessCallback[2][2][kNumberOfAccessSizes]; @@ -770,6 +775,7 @@ struct AddressSanitizer { Value *LocalDynamicShadow = nullptr; const StackSafetyGlobalInfo *SSGI; DenseMap ProcessedAllocas; + SmallPtrSet InterestingParallelAllocas; FunctionCallee AMDGPUAddressShared; FunctionCallee AMDGPUAddressPrivate; @@ -1161,7 +1167,10 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M, const StackSafetyGlobalInfo *const SSGI = ClUseStackSafety ? &MAM.getResult(M) : nullptr; for (Function &F : M) { - AddressSanitizer FunctionSanitizer(M, SSGI, Options.CompileKernel, + TaskInfo *TI = nullptr; + if (!F.empty()) + TI = &FAM.getResult(F); + AddressSanitizer FunctionSanitizer(M, SSGI, TI, Options.CompileKernel, Options.Recover, Options.UseAfterScope, Options.UseAfterReturn); const TargetLibraryInfo &TLI = FAM.getResult(F); @@ -1261,6 +1270,8 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) { // We are only interested in allocas not promotable to registers. // Promotable allocas are common under -O0. (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) && + (!ClSkipPromotableAllocas || + (TI->isSerial() || InterestingParallelAllocas.contains(&AI))) && // inalloca allocas are not treated as static, and we don't want // dynamic alloca instrumentation for them as well. !AI.isUsedWithInAlloca() && @@ -2788,6 +2799,21 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) { } } +void AddressSanitizer::recordInterestingParallelAllocas(const Function &F) { + if (!ClSkipPromotableAllocas || TI->isSerial()) + return; + + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) + if (const AllocaInst *AI = dyn_cast(&I)) + if (AI->getAllocatedType()->isSized() && + ((!AI->isStaticAlloca()) || getAllocaSizeInBytes(*AI) > 0) && + // We are only interested in allocas not promotable to registers. + // Promotable allocas are common under -O0. + !isAllocaPromotable(AI) && !TI->isAllocaParallelPromotable(AI)) + InterestingParallelAllocas.insert(AI); +} + bool AddressSanitizer::suppressInstrumentationSiteForDebug(int &Instrumented) { bool ShouldInstrument = ClDebugMin < 0 || ClDebugMax < 0 || @@ -2830,6 +2856,10 @@ bool AddressSanitizer::instrumentFunction(Function &F, // can be passed to that intrinsic. markEscapedLocalAllocas(F); + // Record all interesting parallel allocas, using TaskInfo analysis before + // instrumentation may disrupt the validity of the analysis. + recordInterestingParallelAllocas(F); + // We want to instrument every address only once per basic block (unless there // are calls between uses). SmallPtrSet TempsToInstrument; diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt index 424f1d43360677..54aea8b5be55e9 100644 --- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_component_library(LLVMInstrumentation BoundsChecking.cpp CGProfile.cpp ControlHeightReduction.cpp + CilkSanitizer.cpp DataFlowSanitizer.cpp GCOVProfiling.cpp BlockCoverageInference.cpp @@ -21,6 +22,8 @@ add_llvm_component_library(LLVMInstrumentation ValueProfileCollector.cpp ThreadSanitizer.cpp HWAddressSanitizer.cpp + ComprehensiveStaticInstrumentation.cpp + SurgicalInstrumentationConfig.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms @@ -32,6 +35,8 @@ add_llvm_component_library(LLVMInstrumentation Analysis Core Demangle + IRReader + Linker MC Support TargetParser diff --git a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp new file mode 100644 index 00000000000000..55489ac59695a5 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp @@ -0,0 +1,4829 @@ +//===- CilkSanitizer.cpp - Nondeterminism detector for Cilk/Tapir ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of CilkSan, a determinacy-race detector for Cilk +// programs. +// +// This instrumentation pass inserts calls to the runtime library before +// appropriate memory accesses. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/CilkSanitizer.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/MustExecute.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirRaceDetect.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/CSI.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "cilksan" + +STATISTIC(NumInstrumentedReads, "Number of instrumented reads"); +STATISTIC(NumInstrumentedWrites, "Number of instrumented writes"); +STATISTIC(NumAccessesWithBadSize, "Number of accesses with bad size"); +STATISTIC(NumOmittedReadsBeforeWrite, + "Number of reads ignored due to following writes"); +STATISTIC(NumOmittedReadsFromConstants, "Number of reads from constant data"); +STATISTIC(NumOmittedNonCaptured, "Number of accesses ignored due to capturing"); +STATISTIC(NumInstrumentedMemIntrinsicReads, + "Number of instrumented reads from memory intrinsics"); +STATISTIC(NumInstrumentedMemIntrinsicWrites, + "Number of instrumented writes from memory intrinsics"); +STATISTIC(NumInstrumentedDetaches, "Number of instrumented detaches"); +STATISTIC(NumInstrumentedDetachExits, "Number of instrumented detach exits"); +STATISTIC(NumInstrumentedSyncs, "Number of instrumented syncs"); +STATISTIC(NumInstrumentedAllocas, "Number of instrumented allocas"); +STATISTIC(NumInstrumentedAllocFns, + "Number of instrumented allocation functions"); +STATISTIC(NumInstrumentedFrees, "Number of instrumented free calls"); +STATISTIC( + NumHoistedInstrumentedReads, + "Number of reads whose instrumentation has been coalesced and hoisted"); +STATISTIC( + NumHoistedInstrumentedWrites, + "Number of writes whose instrumentation has been coalesced and hoisted"); +STATISTIC(NumSunkInstrumentedReads, + "Number of reads whose instrumentation has been coalesced and sunk"); +STATISTIC(NumSunkInstrumentedWrites, + "Number of writes whose instrumentation has been coalesced and sunk"); + +static cl::opt + EnableStaticRaceDetection( + "enable-static-race-detection", cl::init(true), cl::Hidden, + cl::desc("Enable static detection of determinacy races.")); + +static cl::opt + AssumeRaceFreeLibraryFunctions( + "assume-race-free-lib", cl::init(false), cl::Hidden, + cl::desc("Assume library functions are race free.")); + +static cl::opt + IgnoreInaccessibleMemory( + "ignore-inaccessible-memory", cl::init(false), cl::Hidden, + cl::desc("Ignore inaccessible memory when checking for races.")); + +static cl::opt + AssumeNoExceptions( + "cilksan-assume-no-exceptions", cl::init(false), cl::Hidden, + cl::desc("Assume that ordinary calls cannot throw exceptions.")); + +static cl::opt + MaxUsesToExploreCapture( + "cilksan-max-uses-to-explore-capture", cl::init(unsigned(-1)), + cl::Hidden, + cl::desc("Maximum number of uses to explore for a capture query.")); + +static cl::opt MAAPChecks("cilksan-maap-checks", cl::init(true), + cl::Hidden, + cl::desc("Enable or disable MAAP checks.")); + +static cl::opt LoopHoisting( + "cilksan-loop-hoisting", cl::init(true), cl::Hidden, + cl::desc("Enable or disable hoisting instrumentation out of loops.")); + +static cl::opt + IgnoreSanitizeCilkAttr( + "ignore-sanitize-cilk-attr", cl::init(false), cl::Hidden, + cl::desc("Ignore the 'sanitize_cilk' attribute when choosing what to " + "instrument.")); + +static cl::opt ClCilksanBCPath( + "cilksan-bc-path", cl::init(""), cl::Hidden, + cl::desc("Path to the bitcode file for the Cilksan library.")); + +static const unsigned SERIESPARALLEL = 0x1; +static const unsigned SHADOWMEMORY = 0x2; +static cl::opt InstrumentationSet( + "cilksan-instrumentation-set", cl::init(SERIESPARALLEL | SHADOWMEMORY), + cl::Hidden, + cl::desc("Specify the set of instrumentation hooks to insert.")); + +static const char *const CsanRtUnitInitName = "__csanrt_unit_init"; +static const char *const CsiUnitObjTableName = "__csi_unit_obj_table"; +static const char *const CsiUnitObjTableArrayName = "__csi_unit_obj_tables"; + +/// Maintains a mapping from CSI ID of a load or store to the source information +/// of the object accessed by that load or store. +class ObjectTable : public ForensicTable { +public: + ObjectTable() : ForensicTable() {} + ObjectTable(Module &M, StringRef BaseIdName) : ForensicTable(M, BaseIdName) {} + + /// The number of entries in this table + uint64_t size() const { return LocalIdToSourceLocationMap.size(); } + + /// Add the given instruction to this table. + /// \returns The local ID of the Instruction. + uint64_t add(Instruction &I, Value *Obj); + + /// Get the Type for a pointer to a table entry. + /// + /// A table entry is just a source location. + static PointerType *getPointerType(LLVMContext &C); + + /// Insert this table into the given Module. + /// + /// The table is constructed as a ConstantArray indexed by local IDs. The + /// runtime is responsible for performing the mapping that allows the table to + /// be indexed by global ID. + Constant *insertIntoModule(Module &M) const; + +private: + struct SourceLocation { + StringRef Name; + int32_t Line; + StringRef Filename; + StringRef Directory; + }; + + /// Map of local ID to SourceLocation. + DenseMap LocalIdToSourceLocationMap; + + /// Create a struct type to match the "struct SourceLocation" type. + /// (and the source_loc_t type in csi.h). + static StructType *getSourceLocStructType(LLVMContext &C); + + /// Append the line and file information to the table. + void add(uint64_t ID, int32_t Line = -1, + StringRef Filename = "", StringRef Directory = "", + StringRef Name = ""); +}; + +namespace { +struct CilkSanitizerImpl : public CSIImpl { + // Class to manage inserting instrumentation without static race detection. + class SimpleInstrumentor { + public: + SimpleInstrumentor(CilkSanitizerImpl &CilkSanImpl, TaskInfo &TI, + LoopInfo &LI, DominatorTree &DT, + const TargetLibraryInfo *TLI) + : CilkSanImpl(CilkSanImpl), TI(TI), LI(LI), DT(DT), + DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} + + bool InstrumentSimpleInstructions( + SmallVectorImpl &Instructions); + bool InstrumentAnyMemIntrinsics( + SmallVectorImpl &MemIntrinsics); + bool InstrumentCalls(SmallVectorImpl &Calls); + bool InstrumentAncillaryInstructions( + SmallPtrSetImpl &Allocas, + SmallPtrSetImpl &AllocationFnCalls, + SmallPtrSetImpl &FreeCalls, + DenseMap &SyncRegNums, + DenseMap &SRCounters, const DataLayout &DL); + + private: + void getDetachesForInstruction(Instruction *I); + + CilkSanitizerImpl &CilkSanImpl; + TaskInfo &TI; + LoopInfo &LI; + DominatorTree &DT; + DomTreeUpdater DTU; + const TargetLibraryInfo *TLI; + + SmallPtrSet Detaches; + }; + + // Class to manage inserting instrumentation with static race detection. + class Instrumentor { + public: + Instrumentor(CilkSanitizerImpl &CilkSanImpl, RaceInfo &RI, TaskInfo &TI, + LoopInfo &LI, DominatorTree &DT, const TargetLibraryInfo *TLI) + : CilkSanImpl(CilkSanImpl), RI(RI), TI(TI), LI(LI), DT(DT), + DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} + + void InsertArgMAAPs(Function &F, Value *FuncId); + bool InstrumentSimpleInstructions( + SmallVectorImpl &Instructions); + bool InstrumentAnyMemIntrinsics( + SmallVectorImpl &MemIntrinsics); + bool InstrumentCalls(SmallVectorImpl &Calls); + void GetDetachesForCoalescedInstrumentation( + SmallPtrSetImpl &LoopInstToHoist, + SmallPtrSetImpl &LoopInstToSink); + bool InstrumentAncillaryInstructions( + SmallPtrSetImpl &Allocas, + SmallPtrSetImpl &AllocationFnCalls, + SmallPtrSetImpl &FreeCalls, + DenseMap &SyncRegNums, + DenseMap &SRCounters, const DataLayout &DL); + bool InstrumentLoops(SmallPtrSetImpl &LoopInstToHoist, + SmallPtrSetImpl &LoopInstToSink, + SmallPtrSetImpl &TapirLoops, + ScalarEvolution *); + bool PerformDelayedInstrumentation(); + + private: + void getDetachesForInstruction(Instruction *I); + // A MAAP (May Access Alias in Parallel) encodes static information about + // memory access that may result in a race, in order to propagate that + // information dynamically at runtime. In particular, a MAAP for a pointer + // argument to a called function communicates to the callee whether the + // caller or some ancestor may read or write the referenced memory in + // parallel and whether the caller can provide any noalias guarantee on that + // memory location. + enum class MAAPValue : uint8_t + { + NoAccess = 0, + Mod = 1, + Ref = 2, + ModRef = Mod | Ref, + NoAlias = 4, + }; + static unsigned RaceTypeToFlagVal(RaceInfo::RaceType RT); + // Get the MAAP value for specific instruction and operand. + Value *getMAAPValue(Instruction *I, IRBuilder<> &IRB, + unsigned OperandNum = static_cast(-1), + MAAPValue DefaultMV = MAAPValue::ModRef, + bool CheckArgs = true); + // Helper method to determine noalias MAAP bit. + Value *getNoAliasMAAPValue(Instruction *I, IRBuilder<> &IRB, + unsigned OperandNum, MemoryLocation Loc, + const RaceInfo::RaceData &RD, + const Value *Obj, Value *MAAPVal); + // Synthesize a check of the MAAP to determine whether the MAAP means we can + // skip executing instrumentation for the given instruction. + Value *getMAAPCheck(Instruction *I, IRBuilder<> &IRB, + unsigned OperandNum = static_cast(-1)); + // Helper method to read a MAAP value. + Value *readMAAPVal(Value *V, IRBuilder<> &IRB); + + CilkSanitizerImpl &CilkSanImpl; + RaceInfo &RI; + TaskInfo &TI; + LoopInfo &LI; + DominatorTree &DT; + DomTreeUpdater DTU; + const TargetLibraryInfo *TLI; + + SmallPtrSet Detaches; + + DenseMap LocalMAAPs; + SmallPtrSet ArgMAAPs; + + SmallVector DelayedSimpleInsts; + SmallVector, 8> DelayedMemIntrinsics; + SmallVector DelayedCalls; + }; + + // TODO: With recent changes in LLVM's JIT technology, the JitMode option + // no longer seems to be necessary. + CilkSanitizerImpl(Module &M, CallGraph *CG, + function_ref GetDomTree, + function_ref GetTaskInfo, + function_ref GetLoopInfo, + function_ref GetRaceInfo, + function_ref GetTLI, + function_ref GetSE, + // function_ref GetTTI, + bool CallsMayThrow = !AssumeNoExceptions, + bool JitMode = false) + : CSIImpl(M, CG, GetDomTree, GetLoopInfo, GetTaskInfo, GetTLI, GetSE, + nullptr), + GetRaceInfo(GetRaceInfo) { + // Even though we're doing our own instrumentation, we want the CSI setup + // for the instrumentation of function entry/exit, memory accesses (i.e., + // loads and stores), atomics, memory intrinsics. We also want call sites, + // for extracting debug information. + Options.InstrumentBasicBlocks = false; + Options.InstrumentLoops = true; + // Cilksan defines its own hooks for instrumenting memory accesses, memory + // intrinsics, and Tapir instructions, so we disable the default CSI + // instrumentation hooks for these IR objects. + Options.InstrumentMemoryAccesses = false; + Options.InstrumentMemIntrinsics = false; + Options.InstrumentTapir = false; + Options.InstrumentCalls = false; + Options.jitMode = JitMode; + Options.CallsMayThrow = CallsMayThrow; + } + bool setup(bool NeedToSetupCalls); + bool run(); + + static StructType *getUnitObjTableType(LLVMContext &C, + PointerType *EntryPointerType); + static Constant *objTableToUnitObjTable(Module &M, + StructType *UnitObjTableType, + ObjectTable &ObjTable); + static bool isLibCall(const Instruction &I, const TargetLibraryInfo *TLI); + static bool simpleCallCannotRace(const Instruction &I); + static bool shouldIgnoreCall(const Instruction &I); + + static DebugLoc searchForDebugLoc(Instruction *I) { + if (DebugLoc Loc = I->getDebugLoc()) + return Loc; + + // Try to find debug information later in this block. + BasicBlock::iterator BI = I->getIterator(); + BasicBlock::const_iterator BE(I->getParent()->end()); + while (BI != BE) { + if (DebugLoc Loc = BI->getDebugLoc()) { + return Loc; + } + ++BI; + } + + // Try to find debug information earlier in this block. + BI = I->getIterator(); + BasicBlock::const_iterator BB(I->getParent()->begin()); + while (BI != BB) { + --BI; + if (DebugLoc Loc = BI->getDebugLoc()) { + return Loc; + } + } + + return I->getDebugLoc(); + } + + void setupBlocks(Function &F, DominatorTree *DT = nullptr, + LoopInfo *LI = nullptr); + bool setupFunction(Function &F, bool NeedToSetupCalls); + + FunctionCallee getHookFunction(StringRef Name, FunctionType *FnTy, + AttributeList AL) { + FunctionCallee Callee = M.getOrInsertFunction(Name, FnTy, AL); + if (Function *Fn = dyn_cast(Callee.getCallee())) { + Fn->setOnlyAccessesInaccessibleMemOrArgMem(); + Fn->setDoesNotThrow(); + } + return Callee; + } + template + FunctionCallee getHookFunction(StringRef Name, AttributeList AL, Type *RetTy, + ArgsTy... Args) { + FunctionCallee Callee = M.getOrInsertFunction(Name, AL, RetTy, Args...); + if (Function *Fn = dyn_cast(Callee.getCallee())) { + MemoryEffects ME = MemoryEffects::argMemOnly(ModRefInfo::Ref) | + MemoryEffects::inaccessibleMemOnly(ModRefInfo::ModRef); + Fn->setMemoryEffects(ME); + Fn->setDoesNotThrow(); + } + return Callee; + } + template + FunctionCallee getHookFunction(StringRef Name, Type *RetTy, + ArgsTy... Args) { + return getHookFunction(Name, AttributeList{}, RetTy, Args...); + } + + // Methods for handling FED tables + void initializeFEDTables() {} + void collectUnitFEDTables() {} + + // Methods for handling object tables + void initializeCsanObjectTables(); + void collectUnitObjectTables(); + + // Create a call to the runtime unit initialization routine in a global + // constructor. + CallInst *createRTUnitInitCall(IRBuilder<> &IRB) override; + + // Initialize custom hooks for CilkSanitizer + void initializeCsanHooks(); + + Value *GetCalleeFuncID(const Function *Callee, IRBuilder<> &IRB); + + // Helper function for prepareToInstrumentFunction that chooses loads and + // stores in a basic block to instrument. + void chooseInstructionsToInstrument(SmallVectorImpl &Local, + SmallVectorImpl &All, + const TaskInfo &TI, LoopInfo &LI, + const TargetLibraryInfo *TLI); + + // Helper methods for instrumenting different IR objects. + bool instrumentLoadOrStore(Instruction *I, IRBuilder<> &IRB); + bool instrumentLoadOrStore(Instruction *I) { + IRBuilder<> IRB(I); + if (!IRB.getCurrentDebugLocation()) + IRB.SetCurrentDebugLocation(searchForDebugLoc(I)); + return instrumentLoadOrStore(I, IRB); + } + bool instrumentAtomic(Instruction *I, IRBuilder<> &IRB); + bool instrumentAtomic(Instruction *I) { + IRBuilder<> IRB(I); + if (!IRB.getCurrentDebugLocation()) + IRB.SetCurrentDebugLocation(searchForDebugLoc(I)); + return instrumentAtomic(I, IRB); + } + bool instrumentIntrinsicCall(Instruction *I, + SmallVectorImpl *MAAPVals = nullptr); + bool instrumentLibCall(Instruction *I, + SmallVectorImpl *MAAPVals = nullptr); + bool instrumentCallsite(Instruction *I, + SmallVectorImpl *MAAPVals = nullptr); + bool suppressCallsite(Instruction *I); + bool instrumentAllocFnLibCall(Instruction *I, const TargetLibraryInfo *TLI); + bool instrumentAllocationFn(Instruction *I, DominatorTree &DT, + const TargetLibraryInfo *TLI); + bool instrumentFree(Instruction *I, const TargetLibraryInfo *TLI); + bool instrumentDetach(DetachInst *DI, unsigned SyncRegNum, + unsigned NumSyncRegs, DominatorTree &DT, TaskInfo &TI, + LoopInfo &LI); + bool instrumentSync(SyncInst *SI, unsigned SyncRegNum); + void instrumentTapirLoop(Loop &L, TaskInfo &TI, + DenseMap &SyncRegNums, + ScalarEvolution *SE = nullptr); + bool instrumentAlloca(Instruction *I, TaskInfo &TI); + + bool instrumentFunctionUsingRI(Function &F); + // Helper method for RI-based race detection for instrumenting an access by a + // memory intrinsic. + bool instrumentAnyMemIntrinAcc(Instruction *I, unsigned OperandNum, + IRBuilder<> &IRB); + bool instrumentAnyMemIntrinAcc(Instruction *I, unsigned OperandNum) { + IRBuilder<> IRB(I); + if (!IRB.getCurrentDebugLocation()) + IRB.SetCurrentDebugLocation(searchForDebugLoc(I)); + return instrumentAnyMemIntrinAcc(I, OperandNum, IRB); + } + + bool instrumentLoadOrStoreHoisted(Instruction *I, + Value *Addr, + Value *RangeVal, + IRBuilder<> &IRB, + uint64_t LocalId); + +private: + // Analysis results + function_ref GetRaceInfo; + + // Instrumentation hooks + FunctionCallee CsanFuncEntry = nullptr; + FunctionCallee CsanFuncExit = nullptr; + FunctionCallee CsanRead = nullptr; + FunctionCallee CsanWrite = nullptr; + FunctionCallee CsanLargeRead = nullptr; + FunctionCallee CsanLargeWrite = nullptr; + FunctionCallee CsanBeforeCallsite = nullptr; + FunctionCallee CsanAfterCallsite = nullptr; + FunctionCallee CsanDetach = nullptr; + FunctionCallee CsanDetachContinue = nullptr; + FunctionCallee CsanTaskEntry = nullptr; + FunctionCallee CsanTaskExit = nullptr; + FunctionCallee CsanSync = nullptr; + FunctionCallee CsanBeforeLoop = nullptr; + FunctionCallee CsanAfterLoop = nullptr; + FunctionCallee CsanAfterAllocFn = nullptr; + FunctionCallee CsanAfterFree = nullptr; + + // Hooks for suppressing instrumentation, e.g., around callsites that cannot + // expose a race. + FunctionCallee CsanDisableChecking = nullptr; + FunctionCallee CsanEnableChecking = nullptr; + + FunctionCallee GetMAAP = nullptr; + FunctionCallee SetMAAP = nullptr; + + // CilkSanitizer custom forensic tables + ObjectTable LoadObj, StoreObj, AllocaObj, AllocFnObj; + + SmallVector UnitObjTables; + + SmallVector AllocationFnCalls; + SmallVector FreeCalls; + SmallVector Allocas; + SmallPtrSet ToInstrument; + + // Map of functions to updated race type, for interprocedural analysis of + // races. + DenseMap FunctionRaceType; + DenseMap ObjectMRForRace; + + DenseMap> DetachToSync; + + bool LocalBaseObj(const Value *Addr, LoopInfo *LI, + const TargetLibraryInfo *TLI) const; + bool PossibleRaceByCapture(const Value *Addr, const TaskInfo &TI, + LoopInfo *LI) const; + bool unknownObjectUses(const Value *Addr, LoopInfo *LI, + const TargetLibraryInfo *TLI) const; + + // Cached results of calls to getUnderlyingObjects. + using BaseObjMapTy = + DenseMap>; + mutable BaseObjMapTy BaseObjects; + SmallVectorImpl &lookupBaseObjects(const Value *Addr, + LoopInfo *LI) const { + if (!BaseObjects.count(Addr)) { + if (isa(Addr)) + BaseObjects.lookup(Addr); + else + getUnderlyingObjects(Addr, BaseObjects[Addr], LI, 0); + } + return BaseObjects[Addr]; + } + + bool MightHaveDetachedUse(const Value *Addr, const TaskInfo &TI) const; + // // Cached results of calls to MightHaveDetachedUse. + // using DetachedUseMapTy = DenseMap; + // mutable DetachedUseMapTy DetachedUseCache; + bool lookupMightHaveDetachedUse(const Value *Addr, const TaskInfo &TI) const { + return MightHaveDetachedUse(Addr, TI); + // if (!DetachedUseCache.count(Addr)) + // DetachedUseCache[Addr] = MightHaveDetachedUse(Addr, TI); + // return DetachedUseCache[Addr]; + } + + // Cached results of calls to PointerMayBeCaptured. + using MayBeCapturedMapTy = DenseMap; + mutable MayBeCapturedMapTy MayBeCapturedCache; + bool lookupPointerMayBeCaptured(const Value *Ptr) const { + if (!Ptr->getType()->isPointerTy()) + return false; + + if (!MayBeCapturedCache.count(Ptr)) { + if (isa(Ptr)) + MayBeCapturedCache.lookup(Ptr); + else + MayBeCapturedCache[Ptr] = PointerMayBeCaptured(Ptr, true, false, + MaxUsesToExploreCapture); + } + return MayBeCapturedCache[Ptr]; + } + + FunctionCallee getOrInsertSynthesizedHook(StringRef Name, FunctionType *T, + AttributeList AL = AttributeList()); +}; + +/// CilkSanitizer: instrument the code in module to find races. +struct CilkSanitizerLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid. + CilkSanitizerLegacyPass(bool CallsMayThrow = !AssumeNoExceptions, + bool JitMode = false) + : ModulePass(ID), JitMode(JitMode), CallsMayThrow(CallsMayThrow) { + initializeCilkSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); + } + StringRef getPassName() const override { return "CilkSanitizer"; } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnModule(Module &M) override; + + bool JitMode = false; + bool CallsMayThrow = true; +}; +} // end anonymous namespace + +char CilkSanitizerLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN( + CilkSanitizerLegacyPass, "csan", + "CilkSanitizer: detects determinacy races in Cilk programs.", + false, false) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TapirRaceDetectWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_END( + CilkSanitizerLegacyPass, "csan", + "CilkSanitizer: detects determinacy races in Cilk programs.", + false, false) + +void CilkSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); +} + +uint64_t ObjectTable::add(Instruction &I, Value *Obj) { + uint64_t ID = getId(&I); + if (isa(Obj)) { + add(ID, -1, "", "", "(undef)"); + return ID; + } + + // First, if the underlying object is a global variable, get that variable's + // debug information. + if (GlobalVariable *GV = dyn_cast(Obj)) { + SmallVector DbgGVExprs; + GV->getDebugInfo(DbgGVExprs); + for (auto *GVE : DbgGVExprs) { + auto *DGV = GVE->getVariable(); + if (DGV->getName() != "") { + add(ID, DGV->getLine(), DGV->getFilename(), DGV->getDirectory(), + DGV->getName()); + return ID; + } + } + add(ID, -1, "", "", Obj->getName()); + return ID; + } + + // Otherwise, if the underlying object is a function, get that function's + // debug information. + if (Function *F = dyn_cast(Obj)) { + if (DISubprogram *SP = F->getSubprogram()) { + add(ID, SP->getLine(), SP->getFilename(), SP->getDirectory(), + SP->getName()); + return ID; + } + add(ID, -1, "", "", Obj->getName()); + return ID; + } + + // Next, if this is an alloca instruction, look for a llvm.dbg.declare + // intrinsic. + if (AllocaInst *AI = dyn_cast(Obj)) { + TinyPtrVector DbgDeclares = FindDbgDeclareUses(AI); + if (!DbgDeclares.empty()) { + auto *LV = DbgDeclares.front()->getVariable(); + add(ID, LV->getLine(), LV->getFilename(), LV->getDirectory(), + LV->getName()); + return ID; + } + } + + // Otherwise just examine the llvm.dbg.value intrinsics for this object. + SmallVector DbgValues; + findDbgValues(DbgValues, Obj); + for (auto *DVI : DbgValues) { + auto *LV = DVI->getVariable(); + if (LV->getName() != "") { + add(ID, LV->getLine(), LV->getFilename(), LV->getDirectory(), + LV->getName()); + return ID; + } + } + + add(ID, -1, "", "", Obj->getName()); + return ID; +} + +PointerType *ObjectTable::getPointerType(LLVMContext &C) { + return PointerType::get(getSourceLocStructType(C), 0); +} + +StructType *ObjectTable::getSourceLocStructType(LLVMContext &C) { + return StructType::get( + /* Name */ PointerType::get(IntegerType::get(C, 8), 0), + /* Line */ IntegerType::get(C, 32), + /* File */ PointerType::get(IntegerType::get(C, 8), 0)); +} + +void ObjectTable::add(uint64_t ID, int32_t Line, + StringRef Filename, StringRef Directory, + StringRef Name) { + assert(LocalIdToSourceLocationMap.find(ID) == + LocalIdToSourceLocationMap.end() && + "Id already exists in FED table."); + LocalIdToSourceLocationMap[ID] = {Name, Line, Filename, Directory}; +} + +// The order of arguments to ConstantStruct::get() must match the +// obj_source_loc_t type in csan.h. +static void addObjTableEntries(SmallVectorImpl &TableEntries, + StructType *TableType, Constant *Name, + Constant *Line, Constant *File) { + TableEntries.push_back(ConstantStruct::get(TableType, Name, Line, File)); +} + +Constant *ObjectTable::insertIntoModule(Module &M) const { + LLVMContext &C = M.getContext(); + StructType *TableType = getSourceLocStructType(C); + IntegerType *Int32Ty = IntegerType::get(C, 32); + Constant *Zero = ConstantInt::get(Int32Ty, 0); + Value *GepArgs[] = {Zero, Zero}; + SmallVector TableEntries; + + // Get the object-table entries for each ID. + for (uint64_t LocalID = 0; LocalID < IdCounter; ++LocalID) { + const SourceLocation &E = LocalIdToSourceLocationMap.find(LocalID)->second; + // Source line + Constant *Line = ConstantInt::get(Int32Ty, E.Line); + // Source file + Constant *File; + { + std::string Filename = E.Filename.str(); + if (!E.Directory.empty()) + Filename = E.Directory.str() + "/" + Filename; + File = getObjectStrGV(M, Filename, "__csi_unit_filename_"); + } + // Variable name + Constant *Name = getObjectStrGV(M, E.Name, "__csi_unit_object_name_"); + + // Add entry to the table + addObjTableEntries(TableEntries, TableType, Name, Line, File); + } + + ArrayType *TableArrayType = ArrayType::get(TableType, TableEntries.size()); + Constant *Table = ConstantArray::get(TableArrayType, TableEntries); + GlobalVariable *GV = + new GlobalVariable(M, TableArrayType, false, GlobalValue::InternalLinkage, + Table, CsiUnitObjTableName); + return ConstantExpr::getGetElementPtr(GV->getValueType(), GV, GepArgs); +} + +namespace { + +using SCCNodeSet = SmallSetVector; + +} // end anonymous namespace + +bool CilkSanitizerImpl::setup(bool NeedToSetupCalls) { + // Setup functions for instrumentation. + for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + for (CallGraphNode *N : SCC) + if (Function *F = N->getFunction()) + setupFunction(*F, NeedToSetupCalls); + } + return true; +} + +bool CilkSanitizerImpl::run() { + // Link the tool bitcode once initially, to get type definitions. + linkInToolFromBitcode(ClCilksanBCPath); + // Initialize components of the CSI and Cilksan system. + initializeCsi(); + initializeFEDTables(); + initializeCsanObjectTables(); + initializeCsanHooks(); + + // Evaluate the SCC's in the callgraph in post order to support + // interprocedural analysis of potential races in the module. + SmallVector InstrumentedFunctions; + + // Instrument functions. + for (scc_iterator I = scc_begin(CG); !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + for (CallGraphNode *N : SCC) { + if (Function *F = N->getFunction()) + if (instrumentFunctionUsingRI(*F)) + InstrumentedFunctions.push_back(F); + } + } + // After all functions have been analyzed and instrumented, update their + // attributes. + for (Function *F : InstrumentedFunctions) { + updateInstrumentedFnAttrs(*F); + F->removeFnAttr(Attribute::SanitizeCilk); + } + + CSIImpl::collectUnitFEDTables(); + collectUnitFEDTables(); + collectUnitObjectTables(); + finalizeCsi(); + + // Link the tool bitcode a second time, for definitions of used functions. + linkInToolFromBitcode(ClCilksanBCPath); + return true; +} + +void CilkSanitizerImpl::initializeCsanObjectTables() { + LoadObj = ObjectTable(M, CsiLoadBaseIdName); + StoreObj = ObjectTable(M, CsiStoreBaseIdName); + AllocaObj = ObjectTable(M, CsiAllocaBaseIdName); + AllocFnObj = ObjectTable(M, CsiAllocFnBaseIdName); +} + +// Create a struct type to match the unit_obj_entry_t type in csanrt.c. +StructType *CilkSanitizerImpl::getUnitObjTableType( + LLVMContext &C, PointerType *EntryPointerType) { + return StructType::get(IntegerType::get(C, 64), EntryPointerType); +} + +Constant *CilkSanitizerImpl::objTableToUnitObjTable( + Module &M, StructType *UnitObjTableType, ObjectTable &ObjTable) { + Constant *NumEntries = + ConstantInt::get(IntegerType::get(M.getContext(), 64), ObjTable.size()); + // Constant *BaseIdPtr = + // ConstantExpr::getPointerCast(FedTable.baseId(), + // Type::getInt8PtrTy(M.getContext(), 0)); + Constant *InsertedTable = ObjTable.insertIntoModule(M); + return ConstantStruct::get(UnitObjTableType, NumEntries, + InsertedTable); +} + +void CilkSanitizerImpl::collectUnitObjectTables() { + LLVMContext &C = M.getContext(); + StructType *UnitObjTableType = + getUnitObjTableType(C, ObjectTable::getPointerType(C)); + + UnitObjTables.push_back( + objTableToUnitObjTable(M, UnitObjTableType, LoadObj)); + UnitObjTables.push_back( + objTableToUnitObjTable(M, UnitObjTableType, StoreObj)); + UnitObjTables.push_back( + objTableToUnitObjTable(M, UnitObjTableType, AllocaObj)); + UnitObjTables.push_back( + objTableToUnitObjTable(M, UnitObjTableType, AllocFnObj)); +} + +CallInst *CilkSanitizerImpl::createRTUnitInitCall(IRBuilder<> &IRB) { + LLVMContext &C = M.getContext(); + + StructType *UnitFedTableType = + getUnitFedTableType(C, FrontEndDataTable::getPointerType(C)); + StructType *UnitObjTableType = + getUnitObjTableType(C, ObjectTable::getPointerType(C)); + + // Lookup __csanrt_unit_init + SmallVector InitArgTypes({IRB.getInt8PtrTy(), + PointerType::get(UnitFedTableType, 0), + PointerType::get(UnitObjTableType, 0), + InitCallsiteToFunction->getType()}); + FunctionType *InitFunctionTy = + FunctionType::get(IRB.getVoidTy(), InitArgTypes, false); + RTUnitInit = M.getOrInsertFunction(CsanRtUnitInitName, InitFunctionTy); + assert(isa(RTUnitInit.getCallee()) && + "Failed to get or insert __csanrt_unit_init function"); + + ArrayType *UnitFedTableArrayType = + ArrayType::get(UnitFedTableType, UnitFedTables.size()); + Constant *FEDTable = ConstantArray::get(UnitFedTableArrayType, UnitFedTables); + GlobalVariable *FEDGV = new GlobalVariable(M, UnitFedTableArrayType, false, + GlobalValue::InternalLinkage, FEDTable, + CsiUnitFedTableArrayName); + + ArrayType *UnitObjTableArrayType = + ArrayType::get(UnitObjTableType, UnitObjTables.size()); + Constant *ObjTable = ConstantArray::get(UnitObjTableArrayType, UnitObjTables); + GlobalVariable *ObjGV = new GlobalVariable(M, UnitObjTableArrayType, false, + GlobalValue::InternalLinkage, ObjTable, + CsiUnitObjTableArrayName); + + Constant *Zero = ConstantInt::get(IRB.getInt32Ty(), 0); + Value *GepArgs[] = {Zero, Zero}; + + // Insert call to __csanrt_unit_init + return IRB.CreateCall( + RTUnitInit, + {IRB.CreateGlobalStringPtr(M.getName()), + ConstantExpr::getGetElementPtr(FEDGV->getValueType(), FEDGV, GepArgs), + ConstantExpr::getGetElementPtr(ObjGV->getValueType(), ObjGV, GepArgs), + InitCallsiteToFunction}); +} + +// Initialize all instrumentation hooks that are specific to CilkSanitizer. +void CilkSanitizerImpl::initializeCsanHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *FuncPropertyTy = CsiFuncProperty::getType(C); + Type *FuncExitPropertyTy = CsiFuncExitProperty::getType(C); + Type *TaskPropertyTy = CsiTaskProperty::getType(C); + Type *TaskExitPropertyTy = CsiTaskExitProperty::getType(C); + Type *LoadPropertyTy = CsiLoadStoreProperty::getType(C); + Type *StorePropertyTy = CsiLoadStoreProperty::getType(C); + Type *CallPropertyTy = CsiCallProperty::getType(C); + Type *LoopPropertyTy = CsiLoopProperty::getType(C); + Type *AllocFnPropertyTy = CsiAllocFnProperty::getType(C); + Type *FreePropertyTy = CsiFreeProperty::getType(C); + Type *DetachPropertyTy = CsiDetachProperty::getType(C); + Type *DetContPropertyTy = CsiDetachContinueProperty::getType(C); + Type *RetType = IRB.getVoidTy(); + Type *AddrType = IRB.getInt8PtrTy(); + Type *NumBytesType = IRB.getInt32Ty(); + Type *LargeNumBytesType = IntptrTy; + Type *IDType = IRB.getInt64Ty(); + Type *SyncRegType = IRB.getInt32Ty(); + + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + FnAttrs = FnAttrs.addParamAttribute(C, 2, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 2, Attribute::ReadNone); + CsanFuncEntry = getHookFunction("__csan_func_entry", FnAttrs, RetType, + /* func_id */ IDType, + /* frame_ptr */ AddrType, + /* stack_ptr */ AddrType, FuncPropertyTy); + } + { + CsanFuncExit = getHookFunction("__csan_func_exit", RetType, + /* func_exit_id */ IDType, + /* func_id */ IDType, FuncExitPropertyTy); + } + + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + CsanRead = getHookFunction("__csan_load", FnAttrs, RetType, IDType, + AddrType, NumBytesType, LoadPropertyTy); + } + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + CsanWrite = getHookFunction("__csan_store", FnAttrs, RetType, IDType, + AddrType, NumBytesType, StorePropertyTy); + } + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + CsanLargeRead = + getHookFunction("__csan_large_load", FnAttrs, RetType, IDType, AddrType, + LargeNumBytesType, LoadPropertyTy); + } + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + CsanLargeWrite = + getHookFunction("__csan_large_store", FnAttrs, RetType, IDType, + AddrType, LargeNumBytesType, StorePropertyTy); + } + + { + CsanBeforeCallsite = getHookFunction( + "__csan_before_call", IRB.getVoidTy(), IDType, + /*callee func_id*/ IDType, IRB.getInt8Ty(), CallPropertyTy); + } + { + CsanAfterCallsite = + getHookFunction("__csan_after_call", IRB.getVoidTy(), IDType, IDType, + IRB.getInt8Ty(), CallPropertyTy); + } + + { + CsanDetach = getHookFunction("__csan_detach", RetType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, DetachPropertyTy); + } + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 2, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 2, Attribute::ReadNone); + FnAttrs = FnAttrs.addParamAttribute(C, 3, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 3, Attribute::ReadNone); + CsanTaskEntry = getHookFunction("__csan_task", FnAttrs, RetType, + /* task_id */ IDType, + /* detach_id */ IDType, + /* frame_ptr */ AddrType, + /* stack_ptr */ AddrType, TaskPropertyTy); + } + { + CsanTaskExit = + getHookFunction("__csan_task_exit", RetType, + /* task_exit_id */ IDType, + /* task_id */ IDType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, TaskExitPropertyTy); + } + { + CsanDetachContinue = + getHookFunction("__csan_detach_continue", RetType, + /* detach_continue_id */ IDType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, DetContPropertyTy); + } + { + CsanSync = getHookFunction("__csan_sync", RetType, IDType, + /* sync_reg */ SyncRegType); + } + + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + FnAttrs = FnAttrs.addParamAttribute(C, 5, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 5, Attribute::ReadNone); + CsanAfterAllocFn = getHookFunction( + "__csan_after_allocfn", FnAttrs, RetType, IDType, + /* new ptr */ AddrType, /* size */ LargeNumBytesType, + /* num elements */ LargeNumBytesType, /* alignment */ LargeNumBytesType, + /* old ptr */ AddrType, /* property */ AllocFnPropertyTy); + } + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); + FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); + CsanAfterFree = + getHookFunction("__csan_after_free", FnAttrs, RetType, IDType, AddrType, + /* property */ FreePropertyTy); + } + + { + CsanDisableChecking = + getHookFunction("__cilksan_disable_checking", RetType); + } + { + CsanEnableChecking = getHookFunction("__cilksan_enable_checking", + RetType); + } + + Type *MAAPTy = IRB.getInt8Ty(); + { + AttributeList FnAttrs; + FnAttrs = FnAttrs.addParamAttribute(C, 0, Attribute::NoCapture); + GetMAAP = + getHookFunction("__csan_get_MAAP", FnAttrs, RetType, + PointerType::get(MAAPTy, 0), IDType, IRB.getInt8Ty()); + // Unlike other hooks, GetMAAP writes to its pointer argument. Make sure + // the MemoryEffects on the hook reflect this fact. + Function *HookFn = cast(GetMAAP.getCallee()); + HookFn->setMemoryEffects(HookFn->getMemoryEffects() | + MemoryEffects::argMemOnly(ModRefInfo::ModRef)); + } + { + SetMAAP = getHookFunction("__csan_set_MAAP", RetType, MAAPTy, IDType); + } + + { + CsanBeforeLoop = getHookFunction("__csan_before_loop", IRB.getVoidTy(), + IDType, IRB.getInt64Ty(), LoopPropertyTy); + } + { + CsanAfterLoop = getHookFunction("__csan_after_loop", IRB.getVoidTy(), + IDType, IRB.getInt8Ty(), LoopPropertyTy); + } + + // Cilksan-specific attributes on CSI hooks + Function *CsiAfterAllocaFn = cast(CsiAfterAlloca.getCallee()); + CsiAfterAllocaFn->addParamAttr(1, Attribute::NoCapture); + CsiAfterAllocaFn->addParamAttr(1, Attribute::ReadNone); + CsiAfterAllocaFn->setOnlyAccessesInaccessibleMemOrArgMem(); + CsiAfterAllocaFn->setDoesNotThrow(); +} + +static BasicBlock *SplitOffPreds( + BasicBlock *BB, SmallVectorImpl &Preds, DominatorTree *DT, + LoopInfo *LI) { + if (BB->isLandingPad()) { + SmallVector NewBBs; + SplitLandingPadPredecessors(BB, Preds, ".csi-split-lp", ".csi-split", + NewBBs, DT, LI); + return NewBBs[1]; + } + + BasicBlock *NewBB = SplitBlockPredecessors(BB, Preds, ".csi-split", DT, LI); + if (isa(BB->getFirstNonPHIOrDbg())) { + // If the block being split is simply contains an unreachable, then replace + // the terminator of the new block with an unreachable. This helps preserve + // invariants on the CFG structure for Tapir placeholder blocks following + // detached.rethrow and taskframe.resume terminators. + ReplaceInstWithInst(NewBB->getTerminator(), + new UnreachableInst(BB->getContext())); + if (DT) { + DT->deleteEdge(NewBB, BB); + } + } + return BB; +} + +// Setup each block such that all of its predecessors belong to the same CSI ID +// space. +static void setupBlock(BasicBlock *BB, DominatorTree *DT, LoopInfo *LI, + const TargetLibraryInfo *TLI) { + if (BB->isLandingPad()) { + LandingPadInst *LPad = BB->getLandingPadInst(); + if (!LPad->isCleanup()) + LPad->setCleanup(true); + } + + if (BB->getUniquePredecessor()) + return; + + SmallVector DetachPreds; + SmallVector TFResumePreds; + SmallVector SyncPreds; + SmallVector SyncUnwindPreds; + SmallVector AllocFnPreds; + SmallVector FreeFnPreds; + DenseMap> LibCallPreds; + SmallVector InvokePreds; + bool HasOtherPredTypes = false; + unsigned NumPredTypes = 0; + + // Partition the predecessors of the landing pad. + for (BasicBlock *Pred : predecessors(BB)) { + if (isa(Pred->getTerminator()) || + isa(Pred->getTerminator()) || + isDetachedRethrow(Pred->getTerminator())) + DetachPreds.push_back(Pred); + else if (isTaskFrameResume(Pred->getTerminator())) + TFResumePreds.push_back(Pred); + else if (isa(Pred->getTerminator())) + SyncPreds.push_back(Pred); + else if (isSyncUnwind(Pred->getTerminator())) + SyncUnwindPreds.push_back(Pred); + else if (CilkSanitizerImpl::isAllocFn(Pred->getTerminator(), TLI)) + AllocFnPreds.push_back(Pred); + else if (CilkSanitizerImpl::isFreeFn(Pred->getTerminator(), TLI)) + FreeFnPreds.push_back(Pred); + else if (CilkSanitizerImpl::isLibCall(*Pred->getTerminator(), TLI)) { + const Function *Called = + dyn_cast(Pred->getTerminator())->getCalledFunction(); + LibCallPreds[Called].push_back(Pred); + } else if (isa(Pred->getTerminator())) + InvokePreds.push_back(Pred); + else + HasOtherPredTypes = true; + } + + NumPredTypes = static_cast(!DetachPreds.empty()) + + static_cast(!TFResumePreds.empty()) + + static_cast(!SyncPreds.empty()) + + static_cast(!SyncUnwindPreds.empty()) + + static_cast(!AllocFnPreds.empty()) + + static_cast(!FreeFnPreds.empty()) + + static_cast(LibCallPreds.size()) + + static_cast(!InvokePreds.empty()) + + static_cast(HasOtherPredTypes); + + // Splitting predecessors works differently for landingpads versus normal + // basic blocks. If the block is not a landingpad, split off every type of + // predecessor. + unsigned NumPredTypesRequired = static_cast(BB->isLandingPad()); + if (NumPredTypes <= NumPredTypesRequired) + return; + + BasicBlock *BBToSplit = BB; + // Split off the predecessors of each type. + if (!SyncPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, SyncPreds, DT, LI); + NumPredTypes--; + } + if (!SyncUnwindPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); + NumPredTypes--; + } + if (!AllocFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, AllocFnPreds, DT, LI); + NumPredTypes--; + } + if (!FreeFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, FreeFnPreds, DT, LI); + NumPredTypes--; + } + if (!LibCallPreds.empty() && NumPredTypes > NumPredTypesRequired) { + for (auto KeyVal : LibCallPreds) { + if (NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, KeyVal.second, DT, LI); + NumPredTypes--; + } + } + } + if (!InvokePreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, InvokePreds, DT, LI); + NumPredTypes--; + } + if (!TFResumePreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, TFResumePreds, DT, LI); + NumPredTypes--; + } + // We handle detach and detached.rethrow predecessors at the end to preserve + // invariants on the CFG structure about the deadness of basic blocks after + // detached-rethrows. + if (!DetachPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, DetachPreds, DT, LI); + NumPredTypes--; + } +} + +// Setup all basic blocks such that each block's predecessors belong entirely to +// one CSI ID space. +void CilkSanitizerImpl::setupBlocks(Function &F, DominatorTree *DT, + LoopInfo *LI) { + SmallPtrSet BlocksToSetup; + for (BasicBlock &BB : F) { + if (BB.isLandingPad()) + BlocksToSetup.insert(&BB); + + if (InvokeInst *II = dyn_cast(BB.getTerminator())) { + if (!isTapirPlaceholderSuccessor(II->getNormalDest())) + BlocksToSetup.insert(II->getNormalDest()); + } else if (SyncInst *SI = dyn_cast(BB.getTerminator())) + BlocksToSetup.insert(SI->getSuccessor(0)); + } + + for (BasicBlock *BB : BlocksToSetup) + setupBlock(BB, DT, LI, &GetTLI(F)); +} + +// Do not instrument known races/"benign races" that come from compiler +// instrumentation. The user has no way of suppressing them. +static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { + // Peel off GEPs and BitCasts. + Addr = Addr->stripInBoundsOffsets(); + + if (GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->hasSection()) { + StringRef SectionName = GV->getSection(); + // Check if the global is in the PGO counters section. + auto OF = Triple(M->getTargetTriple()).getObjectFormat(); + if (SectionName.endswith( + getInstrProfSectionName(IPSK_cnts, OF, + /*AddSegmentInfo*/ false))) + return false; + } + + // Check if the global is private gcov data. + if (GV->getName().startswith("__llvm_gcov") || + GV->getName().startswith("__llvm_gcda")) + return false; + } + + // Do not instrument acesses from different address spaces; we cannot deal + // with them. + if (Addr) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() != 0) + return false; + } + + return true; +} + +/// Returns true if Addr can only refer to a locally allocated base object, that +/// is, an object created via an AllocaInst or an AllocationFn. +bool CilkSanitizerImpl::LocalBaseObj(const Value *Addr, LoopInfo *LI, + const TargetLibraryInfo *TLI) const { + // If we don't have an address, give up. + if (!Addr) + return false; + + // Get the base objects that this address might refer to. + SmallVectorImpl &BaseObjs = lookupBaseObjects(Addr, LI); + + // If we could not determine the base objects, conservatively return false. + if (BaseObjs.empty()) + return false; + + // If any base object is not an alloca or allocation function, then it's not + // local. + for (const Value *BaseObj : BaseObjs) { + if (isa(BaseObj) || isNoAliasCall(BaseObj)) + continue; + + if (const Argument *A = dyn_cast(BaseObj)) + if (A->hasByValAttr()) + continue; + + LLVM_DEBUG(dbgs() << "Non-local base object " << *BaseObj << "\n"); + return false; + } + + return true; +} + +// Examine the uses of a Instruction AI to determine if it is used in a subtask. +// This method assumes that AI is an allocation instruction, i.e., either an +// AllocaInst or an AllocationFn. +bool CilkSanitizerImpl::MightHaveDetachedUse(const Value *V, + const TaskInfo &TI) const { + // Get the task for this allocation. + const Task *AllocTask = nullptr; + if (const Instruction *I = dyn_cast(V)) + AllocTask = TI.getTaskFor(I->getParent()); + else if (const Argument *A = dyn_cast(V)) + AllocTask = TI.getTaskFor(&A->getParent()->getEntryBlock()); + + // assert(AllocTask && "Null task for instruction."); + if (!AllocTask) { + LLVM_DEBUG(dbgs() << "MightHaveDetachedUse: No task found for given value " + << *V << "\n"); + return false; + } + + if (AllocTask->isSerial()) + // Alloc AI cannot be used in a subtask if its enclosing task is serial. + return false; + + SmallVector Worklist; + SmallSet Visited; + + // Add all uses of AI to the worklist. + for (const Use &U : V->uses()) { + Visited.insert(&U); + Worklist.push_back(&U); + } + + // Evaluate each use of AI. + while (!Worklist.empty()) { + const Use *U = Worklist.pop_back_val(); + + // Check if this use of AI is in a different task from the allocation. + Instruction *I = cast(U->getUser()); + LLVM_DEBUG(dbgs() << "\tExamining use: " << *I << "\n"); + if (AllocTask != TI.getTaskFor(I->getParent())) { + assert(TI.getTaskFor(I->getParent()) != AllocTask->getParentTask() && + "Use of alloca appears in a parent task of that alloca"); + // Because the use of AI cannot appear in a parent task of AI, it must be + // in a subtask. In particular, the use cannot be in a shared-EH spindle. + return true; + } + + // If the pointer to AI is transformed using one of the following + // operations, add uses of the transformed pointer to the worklist. + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + for (Use &UU : I->uses()) + if (Visited.insert(&UU).second) + Worklist.push_back(&UU); + break; + default: + break; + } + } + return false; +} + +/// Returns true if accesses on Addr could race due to pointer capture. +bool CilkSanitizerImpl::PossibleRaceByCapture(const Value *Addr, + const TaskInfo &TI, + LoopInfo *LI) const { + if (isa(Addr)) + // For this analysis, we consider all global values to be captured. + return true; + + // Check for detached uses of the underlying base objects. + SmallVectorImpl &BaseObjs = lookupBaseObjects(Addr, LI); + + // If we could not determine the base objects, conservatively return true. + if (BaseObjs.empty()) + return true; + + for (const Value *BaseObj : BaseObjs) { + // Skip any null objects + if (const Constant *C = dyn_cast(BaseObj)) { + // if (C->isNullValue()) + // continue; + // Is this value a constant that cannot be derived from any pointer + // value (we need to exclude constant expressions, for example, that + // are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa(C) || isa(C) || + isa(C) || + isa(C) || isa(C); + if (IsNonPtrConst) + continue; + } + + // If the base object is not an instruction, conservatively return true. + if (!isa(BaseObj)) { + // From BasicAliasAnalysis.cpp: If this is an argument that corresponds to + // a byval or noalias argument, then it has not escaped before entering + // the function. + if (const Argument *A = dyn_cast(BaseObj)) { + if (!A->hasByValAttr() && !A->hasNoAliasAttr()) + return true; + } else + return true; + } + + // If the base object might have a detached use, return true. + if (lookupMightHaveDetachedUse(BaseObj, TI)) + return true; + } + + // Perform normal pointer-capture analysis. + // if (PointerMayBeCaptured(Addr, false, false)) + if (lookupPointerMayBeCaptured(Addr)) + return true; + + return false; +} + +bool CilkSanitizerImpl::unknownObjectUses(const Value *Addr, LoopInfo *LI, + const TargetLibraryInfo *TLI) const { + // Perform normal pointer-capture analysis. + if (lookupPointerMayBeCaptured(Addr)) + return true; + + // Check for detached uses of the underlying base objects. + SmallVectorImpl &BaseObjs = lookupBaseObjects(Addr, LI); + + // If we could not determine the base objects, conservatively return true. + if (BaseObjs.empty()) + return true; + + // If the base object is not an allocation function, return true. + for (const Value *BaseObj : BaseObjs) + if (!isAllocFn(BaseObj, TLI)) + return true; + + return false; +} + +void CilkSanitizerImpl::chooseInstructionsToInstrument( + SmallVectorImpl &Local, SmallVectorImpl &All, + const TaskInfo &TI, LoopInfo &LI, const TargetLibraryInfo *TLI) { + SmallSet WriteTargets; + // Iterate from the end. + for (Instruction *I : reverse(Local)) { + if (StoreInst *Store = dyn_cast(I)) { + Value *Addr = Store->getPointerOperand(); + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) + continue; + WriteTargets.insert(Addr); + } else { + LoadInst *Load = cast(I); + Value *Addr = Load->getPointerOperand(); + if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr)) + continue; + if (WriteTargets.count(Addr)) { + // We will write to this temp, so no reason to analyze the read. + NumOmittedReadsBeforeWrite++; + continue; + } + if (addrPointsToConstantData(Addr)) { + // Addr points to some constant data -- it can not race with any writes. + NumOmittedReadsFromConstants++; + continue; + } + } + Value *Addr = isa(*I) + ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + if (LocalBaseObj(Addr, &LI, TLI) && + !PossibleRaceByCapture(Addr, TI, &LI)) { + // The variable is addressable but not captured, so it cannot be + // referenced from a different thread and participate in a data race + // (see llvm/Analysis/CaptureTracking.h for details). + NumOmittedNonCaptured++; + continue; + } + LLVM_DEBUG(dbgs() << "Pushing " << *I << "\n"); + All.push_back(I); + } + Local.clear(); +} + +bool CilkSanitizerImpl::isLibCall(const Instruction &I, + const TargetLibraryInfo *TLI) { + if (!isa(I)) + return false; + + if (!TLI) + return false; + + if (const Function *Called = dyn_cast(&I)->getCalledFunction()) { + LibFunc F; + bool FoundLibFunc = TLI->getLibFunc(*Called, F); + if (FoundLibFunc) + return true; + } + + return false; +} + +// Helper function to determine if the call-base instruction \p I should be +// skipped when examining calls that affect race detection. Returns true if and +// only if \p I is a simple call that cannot race. +bool CilkSanitizerImpl::simpleCallCannotRace(const Instruction &I) { + return callsPlaceholderFunction(I); +} + +// Helper function to determine if the call-base instruction \p I should be +// skipped when examining calls that affect race detection. Returns true if and +// only if \p I is identified as a special function that should be ignored. +bool CilkSanitizerImpl::shouldIgnoreCall(const Instruction &I) { + if (const CallBase *Call = dyn_cast(&I)) + if (const Function *Called = Call->getCalledFunction()) + if (Called->hasName() && (Called->getName().startswith("__csi") || + Called->getName().startswith("__csan") || + Called->getName().startswith("__cilksan"))) + return true; + return false; +} + +// Helper function to get the ID of a function being called. These IDs are +// stored in separate global variables in the program. This method will create +// a new global variable for the Callee's ID if necessary. +Value *CilkSanitizerImpl::GetCalleeFuncID(const Function *Callee, + IRBuilder<> &IRB) { + if (!Callee) + // Unknown targets (i.e., indirect calls) are always unknown. + return IRB.getInt64(CsiCallsiteUnknownTargetId); + + std::string GVName = + CsiFuncIdVariablePrefix + Callee->getName().str(); + GlobalVariable *FuncIdGV = M.getNamedGlobal(GVName); + Type *FuncIdGVTy = IRB.getInt64Ty(); + if (!FuncIdGV) { + FuncIdGV = + dyn_cast(M.getOrInsertGlobal(GVName, FuncIdGVTy)); + assert(FuncIdGV); + FuncIdGV->setConstant(false); + if (Options.jitMode && !Callee->empty()) + FuncIdGV->setLinkage(Callee->getLinkage()); + else + FuncIdGV->setLinkage(GlobalValue::WeakAnyLinkage); + FuncIdGV->setInitializer(IRB.getInt64(CsiCallsiteUnknownTargetId)); + } + return IRB.CreateLoad(FuncIdGVTy, FuncIdGV); +} + +//------------------------------------------------------------------------------ +// SimpleInstrumentor methods, which do not do static race detection. +//------------------------------------------------------------------------------ + +bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentSimpleInstructions( + SmallVectorImpl &Instructions) { + bool Result = false; + for (Instruction *I : Instructions) { + bool LocalResult = false; + if (isa(I) || isa(I)) + LocalResult |= CilkSanImpl.instrumentLoadOrStore(I); + else if (isa(I) || isa(I)) + LocalResult |= CilkSanImpl.instrumentAtomic(I); + else + dbgs() << "[Cilksan] Unknown simple instruction: " << *I << "\n"; + + if (LocalResult) { + Result |= LocalResult; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentAnyMemIntrinsics( + SmallVectorImpl &MemIntrinsics) { + bool Result = false; + for (Instruction *I : MemIntrinsics) { + bool LocalResult = false; + if (isa(I)) { + LocalResult |= CilkSanImpl.instrumentAnyMemIntrinAcc(I, /*Src*/ 1); + LocalResult |= CilkSanImpl.instrumentAnyMemIntrinAcc(I, /*Dst*/ 0); + } else { + assert(isa(I) && + "InstrumentAnyMemIntrinsics operating on not a memory intrinsic."); + LocalResult |= CilkSanImpl.instrumentAnyMemIntrinAcc(I, unsigned(-1)); + } + if (LocalResult) { + Result |= LocalResult; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentCalls( + SmallVectorImpl &Calls) { + bool Result = false; + for (Instruction *I : Calls) { + // Allocation-function and free calls are handled separately. + if (isAllocFn(I, TLI) || isFreeFn(I, TLI)) + continue; + + bool LocalResult = false; + if (isa(I)) + LocalResult |= + CilkSanImpl.instrumentIntrinsicCall(I, /*MAAPVals*/ nullptr); + else if (isLibCall(*I, TLI)) + LocalResult |= + CilkSanImpl.instrumentLibCall(I, /*MAAPVals*/ nullptr); + else + LocalResult |= CilkSanImpl.instrumentCallsite(I, /*MAAPVals*/ nullptr); + if (LocalResult) { + Result |= LocalResult; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentAncillaryInstructions( + SmallPtrSetImpl &Allocas, + SmallPtrSetImpl &AllocationFnCalls, + SmallPtrSetImpl &FreeCalls, + DenseMap &SyncRegNums, + DenseMap &SRCounters, const DataLayout &DL) { + bool Result = false; + SmallPtrSet Syncs; + SmallPtrSet Loops; + + // Instrument allocas and allocation-function calls that may be involved in a + // race. + for (Instruction *I : Allocas) { + // The simple instrumentor just instruments everything + CilkSanImpl.instrumentAlloca(I, TI); + getDetachesForInstruction(I); + Result = true; + } + for (Instruction *I : AllocationFnCalls) { + // The simple instrumentor just instruments everything + CilkSanImpl.instrumentAllocationFn(I, DT, TLI); + getDetachesForInstruction(I); + Result = true; + } + for (Instruction *I : FreeCalls) { + // The first argument of the free call is the pointer. + Value *Ptr = I->getOperand(0); + // If the pointer corresponds to an allocation function call in this + // function, then instrument it. + if (Instruction *PtrI = dyn_cast(Ptr)) { + if (AllocationFnCalls.count(PtrI)) { + CilkSanImpl.instrumentFree(I, TLI); + getDetachesForInstruction(I); + Result = true; + continue; + } + } + // The simple instrumentor just instruments everything + CilkSanImpl.instrumentFree(I, TLI); + getDetachesForInstruction(I); + Result = true; + } + + // Instrument detaches + for (DetachInst *DI : Detaches) { + CilkSanImpl.instrumentDetach(DI, SyncRegNums[DI->getSyncRegion()], + SRCounters[DI->getDetached()], DT, TI, LI); + Result = true; + // Get syncs associated with this detach + for (SyncInst *SI : CilkSanImpl.DetachToSync[DI]) + Syncs.insert(SI); + + if (CilkSanImpl.Options.InstrumentLoops) { + // Get any loop associated with this detach. + Loop *L = LI.getLoopFor(DI->getParent()); + if (spawnsTapirLoopBody(DI, LI, TI)) + Loops.insert(L); + } + } + + // Instrument associated syncs + for (SyncInst *SI : Syncs) + CilkSanImpl.instrumentSync(SI, SyncRegNums[SI->getSyncRegion()]); + + if (CilkSanImpl.Options.InstrumentLoops) { + // Recursively instrument all Tapir loops + for (Loop *L : Loops) + CilkSanImpl.instrumentTapirLoop(*L, TI, SyncRegNums); + } + + return Result; +} + +// TODO: Combine this redundant logic with that in Instrumentor +void CilkSanitizerImpl::SimpleInstrumentor::getDetachesForInstruction( + Instruction *I) { + // Get the Task for I. + Task *T = TI.getTaskFor(I->getParent()); + // Add the ancestors of T to the set of detaches to instrument. + while (!T->isRootTask()) { + // Once we encounter a detach we've previously added to the set, we know + // that all its parents are also in the set. + if (!Detaches.insert(T->getDetach()).second) + return; + T = T->getParentTask(); + } +} + +//------------------------------------------------------------------------------ +// Instrumentor methods +//------------------------------------------------------------------------------ + +void CilkSanitizerImpl::Instrumentor::getDetachesForInstruction( + Instruction *I) { + // Get the Task for I. + Task *T = TI.getTaskFor(I->getParent()); + // Add the ancestors of T to the set of detaches to instrument. + while (!T->isRootTask()) { + // Once we encounter a detach we've previously added to the set, we know + // that all its parents are also in the set. + if (!Detaches.insert(T->getDetach()).second) + return; + T = T->getParentTask(); + } +} + +unsigned CilkSanitizerImpl::Instrumentor::RaceTypeToFlagVal( + RaceInfo::RaceType RT) { + unsigned FlagVal = static_cast(MAAPValue::NoAccess); + if (RaceInfo::isLocalRace(RT) || RaceInfo::isOpaqueRace(RT)) + FlagVal = static_cast(MAAPValue::ModRef); + if (RaceInfo::isRaceViaAncestorMod(RT)) + FlagVal |= static_cast(MAAPValue::Mod); + if (RaceInfo::isRaceViaAncestorRef(RT)) + FlagVal |= static_cast(MAAPValue::Ref); + return FlagVal; +} + +static Value *getMAAPIRValue(IRBuilder<> &IRB, unsigned MV) { + return IRB.getInt8(MV); +} + +// Insert per-argument MAAPs for this function +void CilkSanitizerImpl::Instrumentor::InsertArgMAAPs(Function &F, + Value *FuncId) { + if (!MAAPChecks) + return; + LLVM_DEBUG(dbgs() << "InsertArgMAAPs: " << F.getName() << "\n"); + IRBuilder<> IRB(cast(FuncId)->getNextNode()); + unsigned ArgIdx = 0; + for (Argument &Arg : F.args()) { + if (!Arg.getType()->isPtrOrPtrVectorTy()) + continue; + + // Create a new flag for this argument MAAP. + Type *MAAPIRValueTy = getMAAPIRValue(IRB, 0)->getType(); + Value *NewFlag = IRB.CreateAlloca(MAAPIRValueTy, + Arg.getType()->getPointerAddressSpace()); + Value *FinalMV; + // If this function is main, then it has no ancestors that can create races. + if (F.getName() == "main") { + FinalMV = getMAAPIRValue(IRB, RaceTypeToFlagVal(RaceInfo::None)); + IRB.CreateStore(FinalMV, NewFlag); + } else { + // Call the runtime function to set the value of this flag. + IRB.CreateCall(CilkSanImpl.GetMAAP, + {NewFlag, FuncId, IRB.getInt8(ArgIdx)}); + + // Incorporate local information into this MAAP value. + unsigned LocalMV = static_cast(MAAPValue::NoAccess); + if (Arg.hasNoAliasAttr()) + LocalMV |= static_cast(MAAPValue::NoAlias); + + // Store this local MAAP value. + FinalMV = IRB.CreateOr(getMAAPIRValue(IRB, LocalMV), + IRB.CreateLoad(MAAPIRValueTy, NewFlag)); + IRB.CreateStore(FinalMV, NewFlag); + } + // Associate this flag with the argument for future lookups. + LLVM_DEBUG(dbgs() << "Recording local MAAP for arg " << Arg << ": " + << *NewFlag << "\n"); + LocalMAAPs[&Arg] = FinalMV; + ArgMAAPs.insert(FinalMV); + ++ArgIdx; + } + + // Record other objects known to be involved in races. + for (auto &ObjRD : RI.getObjectMRForRace()) { + if (isa(ObjRD.first)) { + unsigned MAAPVal = static_cast(MAAPValue::NoAccess); + if (isModSet(ObjRD.second)) + MAAPVal |= static_cast(MAAPValue::Mod); + if (isRefSet(ObjRD.second)) + MAAPVal |= static_cast(MAAPValue::Ref); + // Determine if this object is no-alias. + if (const CallBase *CB = dyn_cast(ObjRD.first)) { + if (CB->hasRetAttr(Attribute::NoAlias)) + MAAPVal |= static_cast(MAAPValue::NoAlias); + } else if (isa(ObjRD.first)) + MAAPVal |= static_cast(MAAPValue::NoAlias); + + LLVM_DEBUG(dbgs() << "Setting LocalMAAPs for " << *ObjRD.first << " = " + << MAAPVal << "\n"); + LocalMAAPs[ObjRD.first] = getMAAPIRValue(IRB, MAAPVal); + } + } +} + +bool CilkSanitizerImpl::Instrumentor::InstrumentSimpleInstructions( + SmallVectorImpl &Instructions) { + bool Result = false; + for (Instruction *I : Instructions) { + bool LocalResult = false; + // Simple instructions, such as loads, stores, or atomics, have just one + // pointer operand, and therefore should have at most one entry of RaceData. + + // If the instruction might participate in a local or opaque race, + // instrument it unconditionally. + if (RI.mightRaceOpaquely(I)) { + if (isa(I) || isa(I)) + LocalResult |= CilkSanImpl.instrumentLoadOrStore(I); + else if (isa(I) || isa(I)) + LocalResult |= CilkSanImpl.instrumentAtomic(I); + else + dbgs() << "[Cilksan] Unknown simple instruction: " << *I << "\n"; + } else if (RI.mightRaceViaAncestor(I) || RI.mightRaceLocally(I)) { + // Otherwise, if the instruction might participate in a race via an + // ancestor function instantiation, instrument it conditionally, based on + // the pointer. + // + // Delay handling this instruction. + DelayedSimpleInsts.push_back(I); + LocalResult |= true; + } + + // If any instrumentation was inserted, collect associated instructions to + // instrument. + if (LocalResult) { + Result |= LocalResult; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +bool CilkSanitizerImpl::Instrumentor::InstrumentAnyMemIntrinsics( + SmallVectorImpl &MemIntrinsics) { + bool Result = false; + for (Instruction *I : MemIntrinsics) { + bool LocalResult = false; + // If this instruction cannot race, skip it. + if (!RI.mightRace(I)) + continue; + + // Look over the race data to determine what memory intrinsics need to be + // instrumented and how. + SmallSet, 2> ToInstrument; + SmallSet, 2> MaybeDelay; + for (const RaceInfo::RaceData &RD : RI.getRaceData(I)) { + assert(RD.getPtr() && "No pointer for race with memory intrinsic."); + if (RaceInfo::isOpaqueRace(RD.Type)) { + ToInstrument.insert(std::make_pair(I, RD.OperandNum)); + LocalResult |= true; + } else if (RaceInfo::isRaceViaAncestor(RD.Type) || + RaceInfo::isLocalRace(RD.Type)) { + // Possibly delay handling this instruction. + MaybeDelay.insert(std::make_pair(I, RD.OperandNum)); + LocalResult |= true; + } + } + + // Do the instrumentation + for (const std::pair &MemIntrin : ToInstrument) + CilkSanImpl.instrumentAnyMemIntrinAcc(MemIntrin.first, MemIntrin.second); + for (const std::pair &MemIntrin : MaybeDelay) + if (!ToInstrument.count(MemIntrin)) + DelayedMemIntrinsics.push_back(MemIntrin); + + // If any instrumentation was inserted, collect associated instructions to + // instrument. + if (LocalResult) { + Result |= LocalResult; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( + SmallVectorImpl &Calls) { + bool Result = false; + for (Instruction *I : Calls) { + // Allocation-function and free calls are handled separately. + if (isAllocFn(I, TLI) || isFreeFn(I, TLI)) + continue; + + bool LocalResult = false; + bool GetDetaches = false; + + // Get current race data for this call. + RaceInfo::RaceType CallRT = RI.getRaceType(I); + LLVM_DEBUG({ + dbgs() << "Call " << *I << ": "; + RaceInfo::printRaceType(CallRT, dbgs()); + dbgs() << "\n"; + }); + + // Get update race data, if it's available. + RaceInfo::RaceType FuncRT = CallRT; + CallBase *CB = dyn_cast(I); + if (Function *CF = CB->getCalledFunction()) + if (CilkSanImpl.FunctionRaceType.count(CF)) + FuncRT = CilkSanImpl.FunctionRaceType[CF]; + + LLVM_DEBUG({ + dbgs() << " FuncRT: "; + RaceInfo::printRaceType(FuncRT, dbgs()); + dbgs() << "\n"; + }); + + // Propagate information about opaque races from function to call. + if (!RaceInfo::isOpaqueRace(FuncRT)) + CallRT = RaceInfo::clearOpaqueRace(CallRT); + + LLVM_DEBUG({ + dbgs() << " New CallRT: "; + RaceInfo::printRaceType(CallRT, dbgs()); + dbgs() << "\n"; + }); + + // If this instruction cannot race, see if we can suppress it + if (!RaceInfo::isRace(CallRT)) { + // Nothing to suppress if this is an intrinsic + if (isa(I)) + continue; + + // We can only suppress calls whose functions don't have local races. + if (!RaceInfo::isLocalRace(FuncRT)) { + if (!CB->doesNotAccessMemory()) + LocalResult |= CilkSanImpl.suppressCallsite(I); + continue; + // } else { + // GetDetaches |= CilkSanImpl.instrumentCallsite(I); + // // SmallPtrSet Objects; + // // RI.getObjectsFor(I, Objects); + // // for (Value *Obj : Objects) { + // // CilkSanImpl.ObjectMRForRace[Obj] = ModRefInfo::ModRef; + // // } + } + // continue; + } + + // We're going to instrument this call for potential races. First get + // MAAP information for its arguments, if any races depend on the + // ancestor. + SmallVector MAAPVals; + LLVM_DEBUG(dbgs() << "Getting MAAP values for " << *CB << "\n"); + IRBuilder<> IRB(I); + unsigned OpIdx = 0; + for (const Value *Op : CB->args()) { + if (!MAAPChecks) + continue; + + if (!Op->getType()->isPtrOrPtrVectorTy()) { + ++OpIdx; + continue; + } + + // Check if this operand might race via ancestor. + bool RaceViaAncestor = false; + for (const RaceInfo::RaceData &RD : RI.getRaceData(I)) { + if (RD.OperandNum != OpIdx) + continue; + if (RaceInfo::isRaceViaAncestor(RD.Type)) { + RaceViaAncestor = true; + break; + } + } + + Value *MAAPVal; + if (RaceViaAncestor) + // Evaluate race data for I and OpIdx to compute the MAAP value. + MAAPVal = getMAAPValue(I, IRB, OpIdx); + else + // We have either an opaque race or a local race, but _not_ a race via + // an ancestor. We want to propagate MAAP information on pointer + // arguments, but we don't need to be pessimistic when a value can't be + // found. + MAAPVal = getMAAPValue(I, IRB, OpIdx, MAAPValue::NoAccess, + /*CheckArgs*/ false); + LLVM_DEBUG({ + dbgs() << " Op: " << *CB->getArgOperand(OpIdx) << "\n"; + dbgs() << " MAAP value: " << *MAAPVal << "\n"; + }); + MAAPVals.push_back(MAAPVal); + ++OpIdx; + } + + Value *CalleeID = CilkSanImpl.GetCalleeFuncID(CB->getCalledFunction(), IRB); + // We set the MAAPs in reverse order to support stack-like access of the + // MAAPs by in-order calls to GetMAAP in the callee. + for (Value *MAAPVal : reverse(MAAPVals)) + IRB.CreateCall(CilkSanImpl.SetMAAP, {MAAPVal, CalleeID}); + + if (isa(I)) + GetDetaches |= CilkSanImpl.instrumentIntrinsicCall(I, &MAAPVals); + else if (isLibCall(*I, TLI)) + GetDetaches |= CilkSanImpl.instrumentLibCall(I, &MAAPVals); + else + GetDetaches |= CilkSanImpl.instrumentCallsite(I, &MAAPVals); + + // If any instrumentation was inserted, collect associated instructions to + // instrument. + Result |= LocalResult; + if (GetDetaches) { + Result |= GetDetaches; + // Record the detaches for the task containing this instruction. These + // detaches need to be instrumented. + getDetachesForInstruction(I); + } + } + return Result; +} + +Value *CilkSanitizerImpl::Instrumentor::readMAAPVal(Value *V, + IRBuilder<> &IRB) { + if (!ArgMAAPs.count(V)) + return V; + // Marking the load as invariant is not technically correct, because the + // __csan_get_MAAP call sets the value. But this call happens + // once, and all subsequent loads will return the same value. + // + // MDNode *MD = llvm::MDNode::get(IRB.getContext(), llvm::None); + // cast(Load)->setMetadata(LLVMContext::MD_invariant_load, MD); + + // TODO: See if there's a better way to annotate this load for optimization. + // LoadInst *I = IRB.CreateLoad(V); + // if (auto *IMD = I->getMetadata(LLVMContext::MD_invariant_group)) + // I->setMetadata(LLVMContext::MD_invariant_group, IMD); + // else + // I->setMetadata(LLVMContext::MD_invariant_group, + // MDNode::get(IRB.getContext(), {})); + Value *MV; + if (AllocaInst *A = dyn_cast(V)) + MV = IRB.CreateLoad(A->getAllocatedType(), A); + else + MV = V; + return MV; +} + +// Get the memory location for this instruction and operand. +static MemoryLocation getMemoryLocation(Instruction *I, unsigned OperandNum, + const TargetLibraryInfo *TLI) { + if (auto *MI = dyn_cast(I)) { + if (auto *MT = dyn_cast(I)) { + if (OperandNum == 1) + return MemoryLocation::getForSource(MT); + } + return MemoryLocation::getForDest(MI); + } else if (OperandNum == static_cast(-1)) { + return MemoryLocation::get(I); + } else { + assert(isa(I) && + "Unknown instruction and operand ID for getting MemoryLocation."); + CallBase *CB = cast(I); + return MemoryLocation::getForArgument(CB, OperandNum, TLI); + } +} + +// Evaluate the noalias value in the MAAP for Obj, and intersect that result +// with the noalias information for other objects. +Value *CilkSanitizerImpl::Instrumentor::getNoAliasMAAPValue( + Instruction *I, IRBuilder<> &IRB, unsigned OperandNum, + MemoryLocation Loc, const RaceInfo::RaceData &RD, const Value *Obj, + Value *ObjNoAliasFlag) { + AAResults *AA = RI.getAA(); + + for (const RaceInfo::RaceData &OtherRD : RI.getRaceData(I)) { + // Skip checking other accesses that don't involve a pointer + if (!OtherRD.Access.getPointer()) + continue; + // Skip this operand when scanning for aliases + if (OperandNum == OtherRD.OperandNum) + continue; + + // If we can tell statically that these two memory locations don't alias, + // move on. + if (!AA->alias(Loc, getMemoryLocation(I, OtherRD.OperandNum, TLI))) + continue; + + // We trust that the MAAP value in LocalMAAPs[] for this object Obj, set by + // InsertArgMAAPs, is correct. We need to check the underlying objects of + // the other arguments to see if they match this object. + + // Otherwise we check the underlying objects. + SmallPtrSet OtherObjects; + RI.getObjectsFor(OtherRD.Access, OtherObjects); + for (const Value *OtherObj : OtherObjects) { + // If we find another instance of this object in another argument, + // then we don't have "no alias". + if (Obj == OtherObj) { + LLVM_DEBUG({ + dbgs() << "getNoAliasMAAPValue: Matching objects found:\n"; + dbgs() << " Obj: " << *Obj << "\n"; + dbgs() << " I: " << *I << "\n"; + dbgs() << " Operands " << OperandNum << ", " << OtherRD.OperandNum + << "\n"; + }); + return getMAAPIRValue(IRB, 0); + } + + // We now know that Obj and OtherObj don't match. + + // If the other object is an argument, then we trust the noalias value in + // the MAAP for Obj. + if (isa(OtherObj)) + continue; + + // // If the other object is something we can't reason about locally, then we + // // give up. + // if (!isa(OtherObj)) + // return getMAAPIRValue(IRB, 0); + + // Otherwise, check if the other object might alias this one. + if (AA->alias(Loc, MemoryLocation::getBeforeOrAfter(OtherObj))) { + LLVM_DEBUG({ + dbgs() << "getNoAliasMAAPValue: Possible aliasing between:\n"; + dbgs() << " Obj: " << *Obj << "\n"; + dbgs() << " OtherObj: " << *OtherObj << "\n"; + }); + return getMAAPIRValue(IRB, 0); + } + } + } + return ObjNoAliasFlag; +} + +Value *CilkSanitizerImpl::Instrumentor::getMAAPValue(Instruction *I, + IRBuilder<> &IRB, + unsigned OperandNum, + MAAPValue DefaultMV, + bool CheckArgs) { + Function *F = I->getFunction(); + AAResults *AA = RI.getAA(); + MemoryLocation Loc = getMemoryLocation(I, OperandNum, TLI); + Value *MV = getMAAPIRValue(IRB, static_cast(MAAPValue::NoAccess)); + Value *DefaultMAAP = getMAAPIRValue(IRB, static_cast(DefaultMV)); + Value *NoAliasFlag = + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias)); + + // If I is a call, check if any other arguments of this call alias the + // specified operand. + if (const CallBase *CB = dyn_cast(I)) { + unsigned OpIdx = 0; + bool FoundAliasingArg = false; + for (const Value *Arg : CB->args()) { + // Skip this operand and any operands that are not pointers. + if (OpIdx == OperandNum || !Arg->getType()->isPtrOrPtrVectorTy()) { + ++OpIdx; + continue; + } + + // If this argument does not alias Loc, skip it. + if (!AA->alias(Loc, getMemoryLocation(I, OpIdx, TLI))) { + ++OpIdx; + continue; + } + + // If the operands must alias, then discard the default noalias MAAP + // value. + AliasResult ArgAlias = AA->alias(Loc, getMemoryLocation(I, OpIdx, TLI)); + if (AliasResult::MustAlias == ArgAlias || + AliasResult::PartialAlias == ArgAlias) { + NoAliasFlag = getMAAPIRValue(IRB, 0); + break; + } + + // Get objects corresponding to this argument. + SmallPtrSet ArgObjects; + RI.getObjectsFor(RaceInfo::MemAccessInfo( + Arg, isModSet(AA->getArgModRefInfo(CB, OpIdx))), + ArgObjects); + for (const Value *Obj : ArgObjects) { + // If Loc and the racer object cannot alias, then there's nothing to + // check. + if (!AA->alias(Loc, MemoryLocation::getBeforeOrAfter(Obj))) + continue; + + // If we have no local MAAP data for Obj, then act pessimally. + if (!LocalMAAPs.count(Obj)) { + FoundAliasingArg = true; + break; + } + + // Intersect the dynamic noalias information for this object into the + // noalias flag. + Value *FlagLoad = readMAAPVal(LocalMAAPs[Obj], IRB); + Value *ObjNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + NoAliasFlag = IRB.CreateAnd(NoAliasFlag, ObjNoAliasFlag); + } + + if (FoundAliasingArg) { + // If we found an aliasing argument, fall back to noalias = false. + NoAliasFlag = getMAAPIRValue(IRB, 0); + break; + } + ++OpIdx; + } + } + + // Check the recorded race data for I. + for (const RaceInfo::RaceData &RD : RI.getRaceData(I)) { + // Skip race data for different operands of the same instruction. + if (OperandNum != RD.OperandNum) + continue; + + // Otherwise use information about the possibly accessed objects to + // determine the MAAP value. + SmallPtrSet Objects; + RI.getObjectsFor(RD.Access, Objects); + + // If we have a valid racer, get the objects that that racer might access. + SmallPtrSet RacerObjects; + unsigned LocalRaceVal = static_cast(MAAPValue::NoAccess); + if (RD.Racer.isValid()) { + // Get the local race value for this racer + assert(RaceInfo::isLocalRace(RD.Type) && "Valid racer for nonlocal race"); + RI.getObjectsFor( + RaceInfo::MemAccessInfo(RD.Racer.getPtr(), RD.Racer.isMod()), + RacerObjects); + if (RD.Racer.isMod()) + LocalRaceVal |= static_cast(MAAPValue::Mod); + if (RD.Racer.isRef()) + LocalRaceVal |= static_cast(MAAPValue::Ref); + } + + // Get MAAPs from objects + for (const Value *Obj : Objects) { + // If we find an object with no MAAP, give up. + if (!LocalMAAPs.count(Obj)) { + LLVM_DEBUG(dbgs() << "No local MAAP found for obj " << *Obj << "\n"); + if (RD.Racer.isValid()) + MV = IRB.CreateOr(MV, getMAAPIRValue(IRB, LocalRaceVal)); + else + MV = IRB.CreateOr(MV, DefaultMAAP); + continue; + } + + Value *FlagLoad = readMAAPVal(LocalMAAPs[Obj], IRB); + Value *FlagCheck = IRB.CreateAnd( + FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + MV = IRB.CreateOr(MV, FlagCheck); + + // Get the dynamic no-alias bit from the MAAP value. + Value *ObjNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + Value *NoAliasCheck = + IRB.CreateICmpNE(getMAAPIRValue(IRB, 0), ObjNoAliasFlag); + + if (RD.Racer.isValid()) { + for (const Value *RObj : RacerObjects) { + // If the racer object matches Obj, there's no need to check a flag. + if (RObj == Obj) { + MV = IRB.CreateOr(MV, LocalRaceVal); + continue; + } + + // If Loc and the racer object cannot alias, then there's nothing to + // check. + if (!AA->alias(Loc, MemoryLocation::getBeforeOrAfter(RObj))) + continue; + + // If there is must or partial aliasing between this object and racer + // object, or we have no local MAAP information for RObj, then + // act conservatively, because there's nothing to check. + if (AliasResult::MustAlias == + AA->alias(Loc, MemoryLocation::getBeforeOrAfter(RObj)) || + AliasResult::PartialAlias == + AA->alias(Loc, MemoryLocation::getBeforeOrAfter(RObj)) || + !LocalMAAPs.count(RObj)) { + if (!LocalMAAPs.count(RObj)) + LLVM_DEBUG(dbgs() << "No local MAAP found for racer object " + << *RObj << "\n"); + else + LLVM_DEBUG(dbgs() << "AA indicates must or partial alias with " + "racer object " + << *RObj << "\n"); + MV = IRB.CreateOr(MV, LocalRaceVal); + continue; + } + + // These two objects may alias, based on static analysis. Check the + // dynamic MAAP values. We can suppress the race if either this + // object or the racer object is dynamically noalias, i.e., if either + // was derived from an allocation or noalias function argument. + Value *FlagLoad = readMAAPVal(LocalMAAPs[RObj], IRB); + Value *RObjNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + Value *RObjNoAliasCheck = + IRB.CreateICmpNE(getMAAPIRValue(IRB, 0), RObjNoAliasFlag); + Value *FlagCheck = IRB.CreateSelect( + IRB.CreateOr(NoAliasCheck, RObjNoAliasCheck), + getMAAPIRValue(IRB, 0), + IRB.CreateAnd(FlagLoad, getMAAPIRValue(IRB, LocalRaceVal))); + MV = IRB.CreateOr(MV, FlagCheck); + } + } else if (CheckArgs) { + // Check the function arguments that might alias this object. + for (Argument &Arg : F->args()) { + // Ignore non-pointer arguments + if (!Arg.getType()->isPtrOrPtrVectorTy()) + continue; + // Ignore any arguments that match checked objects. + if (&Arg == Obj) + continue; + // Check if Loc and Arg may alias. + if (!AA->alias(Loc, MemoryLocation::getBeforeOrAfter(&Arg))) + continue; + // If we have no local MAAP information about the argument, + // then there's nothing to check. + if (!LocalMAAPs.count(&Arg)) { + LLVM_DEBUG(dbgs() << "No local MAAP found for arg " << Arg << "\n"); + MV = IRB.CreateOr(MV, DefaultMAAP); + continue; + } + + // These two objects may alias, based on static analysis. Check the + // dynamic MAAP values. We can suppress the race if either + // this object or the racer object is dynamically noalias, i.e., if + // either was derived from an allocation or noalias function argument. + Value *FlagLoad = readMAAPVal(LocalMAAPs[&Arg], IRB); + Value *ArgNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + Value *ArgNoAliasCheck = + IRB.CreateICmpNE(getMAAPIRValue(IRB, 0), ArgNoAliasFlag); + Value *FlagCheck = IRB.CreateSelect( + IRB.CreateOr(NoAliasCheck, ArgNoAliasCheck), + getMAAPIRValue(IRB, 0), + IRB.CreateAnd(FlagLoad, + getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type)))); + MV = IRB.CreateOr(MV, FlagCheck); + } + } + // Call getNoAliasMAAPValue to evaluate the no-alias value in the + // MAAP for Obj, and intersect that result with the noalias + // information for other objects. + NoAliasFlag = IRB.CreateAnd(NoAliasFlag, + getNoAliasMAAPValue(I, IRB, OperandNum, Loc, + RD, Obj, ObjNoAliasFlag)); + } + } + // Record the no-alias information. + MV = IRB.CreateOr(MV, NoAliasFlag); + return MV; +} + +Value *CilkSanitizerImpl::Instrumentor::getMAAPCheck(Instruction *I, + IRBuilder<> &IRB, + unsigned OperandNum) { + Function *F = I->getFunction(); + bool LocalRace = RI.mightRaceLocally(I); + AAResults *AA = RI.getAA(); + MemoryLocation Loc = getMemoryLocation(I, OperandNum, TLI); + Value *MAAPChk = IRB.getTrue(); + if (LocalRace) + return IRB.getFalse(); + + // Check the recorded race data for I. + for (const RaceInfo::RaceData &RD : RI.getRaceData(I)) { + LLVM_DEBUG(dbgs() << " Race Data:\n Ptr = " << *RD.getPtr() << "\n"); + LLVM_DEBUG(RaceInfo::printRaceType(RD.Type, dbgs() << " ")); + LLVM_DEBUG(dbgs() << "\n"); + LLVM_DEBUG(dbgs() << " current MAAPChk = " << *MAAPChk << "\n"); + // Skip race data for different operands of the same instruction. + if (OperandNum != RD.OperandNum) + continue; + + // If this racer is opaque, then we can't create a valid MAAP check for it. + if (RaceInfo::isOpaqueRace(RD.Type)) + return IRB.getFalse(); + + // If this racer is local, then skip it. We've already accommodated local + // races via runtime pointer checks, if available. + if (RaceInfo::isLocalRace(RD.Type)) + continue; + + LLVM_DEBUG(dbgs() << " Getting objects for racer\n"); + + SmallPtrSet Objects; + RI.getObjectsFor(RD.Access, Objects); + + // If we have a valid racer, get the objects that that racer might access. + SmallPtrSet RacerObjects; + + for (const Value *Obj : Objects) { + LLVM_DEBUG(dbgs() << " Object " << *Obj << "\n"); + LLVM_DEBUG(dbgs() << " current MAAPChk = " << *MAAPChk << "\n"); + // Ignore objects that are not involved in races. + if (!RI.ObjectInvolvedInRace(Obj)) + continue; + + // If we find an object with no MAAP, give up. + if (!LocalMAAPs.count(Obj)) { + LLVM_DEBUG(dbgs() << "No local MAAP found for object " << *Obj << "\n" + << " I: " << *I << "\n" + << " Ptr: " << *RD.Access.getPointer() << "\n"); + return IRB.getFalse(); + } + + Value *FlagLoad = readMAAPVal(LocalMAAPs[Obj], IRB); + LLVM_DEBUG(dbgs() << " FlagLoad " << *FlagLoad << "\n"); + + // If we're dealing with a local race, then don't suppress based on the + // race-type information from the MAAP value. For function arguments, + // that MAAP value reflects potential races via an ancestor, which should + // not disable checking of local races. + Value *LocalCheck; + Value *FlagCheck = IRB.CreateAnd( + FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + LLVM_DEBUG(dbgs() << " FlagCheck " << *FlagCheck << "\n"); + LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); + LLVM_DEBUG(dbgs() << " LocalCheck " << *LocalCheck << "\n"); + + // Add the check. + MAAPChk = IRB.CreateAnd(MAAPChk, LocalCheck); + LLVM_DEBUG(dbgs() << " MAAPChk " << *MAAPChk << "\n"); + + // Get the dynamic no-alias bit from the MAAP value. + Value *NoAliasCheck = IRB.CreateICmpNE( + getMAAPIRValue(IRB, 0), + IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias)))); + + if (RD.Racer.isValid()) { + for (const Value *RObj : RacerObjects) { + LLVM_DEBUG(dbgs() << " Racer Object " << *RObj << "\n"); + // If the racer object matches Obj, there's no need to check a flag. + if (RObj == Obj) { + MAAPChk = IRB.getFalse(); + continue; + } + + // Check if Loc and the racer object may alias. + if (!AA->alias(Loc, MemoryLocation::getBeforeOrAfter(RObj))) + continue; + + if (!LocalMAAPs.count(RObj)) { + LLVM_DEBUG(dbgs() << "No local MAAP found for racer object " << RObj + << "\n"); + MAAPChk = IRB.getFalse(); + continue; + } + + Value *FlagLoad = readMAAPVal(LocalMAAPs[RObj], IRB); + LLVM_DEBUG(dbgs() << " FlagLoad " << *FlagLoad << "\n"); + Value *LocalCheck; + Value *FlagCheck = IRB.CreateAnd( + FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + LLVM_DEBUG(dbgs() << " FlagCheck " << *FlagCheck << "\n"); + LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); + LLVM_DEBUG(dbgs() << " LocalCheck " << *LocalCheck << "\n"); + + // Add the check. + Value *RObjNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + Value *RObjNoAliasCheck = + IRB.CreateICmpNE(getMAAPIRValue(IRB, 0), RObjNoAliasFlag); + MAAPChk = IRB.CreateAnd( + MAAPChk, + IRB.CreateOr(IRB.CreateOr(NoAliasCheck, RObjNoAliasCheck), + LocalCheck)); + } + } + + // Check the function arguments that might alias this object. + for (Argument &Arg : F->args()) { + // Ignore non-pointer arguments + if (!Arg.getType()->isPtrOrPtrVectorTy()) + continue; + // Ignore any arguments that match checked objects. + if (&Arg == Obj) + continue; + // Check if Loc and Arg may alias. + if (!AA->alias(Loc, MemoryLocation::getBeforeOrAfter(&Arg))) + continue; + // If we have no local MAAP information about the argument, give up. + if (!LocalMAAPs.count(&Arg)) { + LLVM_DEBUG(dbgs() << "No local MAAP found for arg " << Arg << "\n"); + return IRB.getFalse(); + } + + LLVM_DEBUG(dbgs() << " Argument " << Arg << "\n"); + + // Incorporate the MAAP value for this argument if we don't have + // a dynamic no-alias bit set. + Value *FlagLoad = readMAAPVal(LocalMAAPs[&Arg], IRB); + Value *FlagCheck; + FlagCheck = IRB.CreateAnd( + FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + Value *LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); + + Value *ArgNoAliasFlag = IRB.CreateAnd( + FlagLoad, + getMAAPIRValue(IRB, static_cast(MAAPValue::NoAlias))); + Value *ArgNoAliasCheck = + IRB.CreateICmpNE(getMAAPIRValue(IRB, 0), ArgNoAliasFlag); + MAAPChk = IRB.CreateAnd( + MAAPChk, IRB.CreateOr(IRB.CreateOr(NoAliasCheck, ArgNoAliasCheck), + LocalCheck)); + } + } + } + return MAAPChk; +} + +bool CilkSanitizerImpl::Instrumentor::PerformDelayedInstrumentation() { + bool Result = false; + // Handle delayed simple instructions + for (Instruction *I : DelayedSimpleInsts) { + assert((RI.mightRaceViaAncestor(I) || RI.mightRaceLocally(I)) && + "Delayed instrumentation is not local race or race via ancestor"); + IRBuilder<> IRB(I); + DebugLoc Loc = searchForDebugLoc(I); + + if (MAAPChecks) { + Value *MAAPChk = getMAAPCheck(I, IRB); + if (MAAPChk != IRB.getFalse()) { + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(IRB.CreateICmpEQ(MAAPChk, IRB.getFalse()), + I, false, nullptr, &DTU, &LI); + IRB.SetInsertPoint(CheckTerm); + } + } + if (Loc) + IRB.SetCurrentDebugLocation(Loc); + if (isa(I) || isa(I)) + Result |= CilkSanImpl.instrumentLoadOrStore(I, IRB); + else if (isa(I) || isa(I)) + Result |= CilkSanImpl.instrumentAtomic(I, IRB); + else + dbgs() << "[Cilksan] Unknown simple instruction: " << *I << "\n"; + } + + // Handle delayed memory intrinsics + for (auto &MemIntrinOp : DelayedMemIntrinsics) { + Instruction *I = MemIntrinOp.first; + assert((RI.mightRaceViaAncestor(I) || RI.mightRaceLocally(I)) && + "Delayed instrumentation is not local race or race via ancestor"); + unsigned OperandNum = MemIntrinOp.second; + IRBuilder<> IRB(I); + DebugLoc Loc = searchForDebugLoc(I); + + if (MAAPChecks) { + Value *MAAPChk = getMAAPCheck(I, IRB, OperandNum); + if (MAAPChk != IRB.getFalse()) { + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(IRB.CreateICmpEQ(MAAPChk, IRB.getFalse()), + I, false, nullptr, &DTU, &LI); + IRB.SetInsertPoint(CheckTerm); + } + } + if (Loc) + IRB.SetCurrentDebugLocation(Loc); + Result |= CilkSanImpl.instrumentAnyMemIntrinAcc(I, OperandNum, IRB); + } + return Result; +} + +// Helper function to walk the hierarchy of tasks containing BasicBlock BB to +// get the top-level task in loop L that contains BB. +static Task *GetTopLevelTaskFor(BasicBlock *BB, Loop *L, TaskInfo &TI) { + Task *T = TI.getTaskFor(BB); + // Return null if we don't find a task for BB contained in L. + if (!T || !L->contains(T->getEntry())) + return nullptr; + + // Walk up the tree of tasks until we discover a task containing BB that is + // outside of L. + while (L->contains(T->getParentTask()->getEntry())) + T = T->getParentTask(); + + return T; +} + +void CilkSanitizerImpl::Instrumentor::GetDetachesForCoalescedInstrumentation( + SmallPtrSetImpl &LoopInstToHoist, + SmallPtrSetImpl &LoopInstToSink) { + // Determine detaches to instrument for the coalesced instrumentation. + for (Instruction *I : LoopInstToHoist) { + Loop *L = LI.getLoopFor(I->getParent()); + // Record the detaches for the loop preheader, where the coalesced + // instrumentation will be inserted. + getDetachesForInstruction(L->getLoopPreheader()->getTerminator()); + } + for (Instruction *I : LoopInstToSink) { + Loop *L = LI.getLoopFor(I->getParent()); + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + for (BasicBlock *ExitBB : ExitBlocks) { + if (GetTopLevelTaskFor(ExitBB, L, TI)) + // Skip any exit blocks in a Tapir task inside the loop. These exit + // blocks lie on exception-handling paths, and to handle these blocks, + // it suffices to insert instrumentation in the unwind destination of + // the corresponding detach, which must also be a loop-exit block. + continue; + + // Record the detaches for the exit block, where the coalesced + // instrumentation will be inserted. + getDetachesForInstruction(ExitBB->getTerminator()); + } + } +} + +bool CilkSanitizerImpl::Instrumentor::InstrumentAncillaryInstructions( + SmallPtrSetImpl &Allocas, + SmallPtrSetImpl &AllocationFnCalls, + SmallPtrSetImpl &FreeCalls, + DenseMap &SyncRegNums, + DenseMap &SRCounters, const DataLayout &DL) { + bool Result = false; + SmallPtrSet Syncs; + SmallPtrSet Loops; + SmallPtrSet InstrumentedAllocFns; + + // Instrument allocas and allocation-function calls that may be involved in a + // race. + for (Instruction *I : Allocas) { + if (CilkSanImpl.ObjectMRForRace.count(I) || + CilkSanImpl.lookupPointerMayBeCaptured(I)) { + CilkSanImpl.instrumentAlloca(I, TI); + getDetachesForInstruction(I); + Result = true; + } + } + for (Instruction *I : AllocationFnCalls) { + // Instrument any allocation-function calls that may allocate memory + // involved in a race. + // + // Note that, without MAAP checks, we must be more conservative about + // considering what memory allocations might be involved in checking for + // races. For example, suppose a function call in a loop uses memory that + // is malloc'd and free'd within that loop. Static analysis might determine + // no race is possible on that memory, but a MAAP check is needed to + // communicate that static information to the function at runtime in order + // to avoid dynamic checks on the same location returned by repeated calls + // to malloc. + + // FIXME: This test won't identify posix_memalign calls as needing + // instrumentation, because posix_memalign modifies a pointer to the pointer + // to the object. + if (!MAAPChecks || CilkSanImpl.ObjectMRForRace.count(I) || + CilkSanImpl.lookupPointerMayBeCaptured(I)) { + CilkSanImpl.instrumentAllocationFn(I, DT, TLI); + InstrumentedAllocFns.insert(I); + getDetachesForInstruction(I); + Result = true; + } + } + for (Instruction *I : FreeCalls) { + // The first argument of the free call is the pointer. + Value *Ptr = I->getOperand(0); + // If the pointer corresponds to an allocation function call in this + // function, or if the pointer is involved in a race, then instrument it. + if (Instruction *PtrI = dyn_cast(Ptr)) { + if (InstrumentedAllocFns.count(PtrI)) { + CilkSanImpl.instrumentFree(I, TLI); + getDetachesForInstruction(I); + Result = true; + continue; + } + } + if (RI.ObjectInvolvedInRace(Ptr) || + CilkSanImpl.unknownObjectUses(Ptr, &LI, TLI)) { + CilkSanImpl.instrumentFree(I, TLI); + getDetachesForInstruction(I); + Result = true; + } + } + + // Instrument detaches + for (DetachInst *DI : Detaches) { + CilkSanImpl.instrumentDetach(DI, SyncRegNums[DI->getSyncRegion()], + SRCounters[DI->getDetached()], DT, TI, LI); + Result = true; + // Get syncs associated with this detach + for (SyncInst *SI : CilkSanImpl.DetachToSync[DI]) + Syncs.insert(SI); + + if (CilkSanImpl.Options.InstrumentLoops) { + // Get any loop associated with this detach. + Loop *L = LI.getLoopFor(DI->getParent()); + if (spawnsTapirLoopBody(DI, LI, TI)) + Loops.insert(L); + } + } + + // Instrument associated syncs + for (SyncInst *SI : Syncs) + CilkSanImpl.instrumentSync(SI, SyncRegNums[SI->getSyncRegion()]); + + if (CilkSanImpl.Options.InstrumentLoops) { + // Recursively instrument all loops + for (Loop *L : Loops) + CilkSanImpl.instrumentTapirLoop(*L, TI, SyncRegNums); + } + + return Result; +} + +// Helper function to get a value for the runtime trip count of the given loop. +static const SCEV *getRuntimeTripCount(Loop &L, ScalarEvolution *SE, + bool IsTapirLoop) { + BasicBlock *Latch = L.getLoopLatch(); + + // The exit count from the latch is sufficient for Tapir loops, because early + // exits from such loops are handled in a special manner. For other loops, we + // use the backedge taken count. + const SCEV *BECountSC = + IsTapirLoop ? SE->getExitCount(&L, Latch) : SE->getBackedgeTakenCount(&L); + if (isa(BECountSC) || + !BECountSC->getType()->isIntegerTy()) { + LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); + return SE->getCouldNotCompute(); + } + + // Add 1 since the backedge count doesn't include the first loop iteration. + const SCEV *TripCountSC = + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + if (isa(TripCountSC)) { + LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); + return SE->getCouldNotCompute(); + } + + return TripCountSC; +} + +// Helper function to find where in the given basic block to insert coalesced +// instrumentation. +static Instruction *getLoopBlockInsertPt(BasicBlock *BB, FunctionCallee LoopHook, + bool AfterHook) { + // BasicBlock *PreheaderBB = L->getLoopPreheader(); + for (Instruction &I : *BB) + if (CallBase *CB = dyn_cast(&I)) + if (const Function *Called = CB->getCalledFunction()) + if (Called == LoopHook.getCallee()) { + // We found a call to the specified hook. Pick an insertion point + // with respect to it. + if (AfterHook) + return &*CB->getIterator()->getNextNode(); + else + return CB; + } + + if (AfterHook) + return &*BB->getFirstInsertionPt(); + else + return BB->getTerminator(); +} + +// TODO: Maybe to avoid confusion with CilkSanImpl.Options.InstrumentLoops +// (which is unrelated to this), rename this to involve the word "hoist" or something. +bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( + SmallPtrSetImpl &LoopInstToHoist, + SmallPtrSetImpl &LoopInstToSink, + SmallPtrSetImpl &TapirLoops, ScalarEvolution *SE) { + bool Result = false; + + // First insert computation for the hook arguments for all instructions to + // hoist or sink coalesced instrumentation. We do this before inserting the + // hook calls themselves, so that changes to the CFG -- specifically, from + // inserting MAAP checks -- do not disrupt any function analyses we need. + + // Map instructions in the loop to address and range arguments for coalesced + // instrumentation. + DenseMap> HoistedHookArgs; + // Compute arguments for coalesced instrumentation hoisted to before the loop. + for (Instruction *I : LoopInstToHoist) { + // Get the insertion point in the preheader of the loop. + Loop *L = LI.getLoopFor(I->getParent()); + assert(L->getLoopPreheader() && "No preheader for loop"); + Instruction *InsertPt = + getLoopBlockInsertPt(L->getLoopPreheader(), CilkSanImpl.CsanBeforeLoop, + /*AfterHook*/ false); + + // TODO: Unify this SCEV computation with the similar computation for + // instructions in LoopInstToSink. + + // Get the SCEV describing this instruction's pointer + const SCEV *V = SE->getSCEV(getLoadStorePointerOperand(I)); + const SCEVAddRecExpr *SrcAR = dyn_cast(V); + + // Get the stride + const SCEV *StrideExpr = SrcAR->getStepRecurrence(*SE); + assert(!isa(StrideExpr) && + "Stride should be computable"); + bool NegativeStride = SE->isKnownNegative(StrideExpr); + if (NegativeStride) + StrideExpr = SE->getNegativeSCEV(StrideExpr); + + // Get the first address accessed. + const SCEV *FirstAddr = SrcAR->getStart(); + + // Get the last address accessed. + BasicBlock *Latch = L->getLoopLatch(); + const SCEV *BECount = TapirLoops.count(L) ? SE->getExitCount(L, Latch) + : SE->getBackedgeTakenCount(L); + const SCEV *LastAddr = SrcAR->evaluateAtIteration(BECount, *SE); + + // Get the size (number of bytes) of the address range accessed. + const SCEV *RangeExpr = NegativeStride + ? SE->getMinusSCEV(FirstAddr, LastAddr) + : SE->getMinusSCEV(LastAddr, FirstAddr); + RangeExpr = SE->getAddExpr(RangeExpr, StrideExpr); + + // Get the start (lowest address) of the address range accessed. + const SCEV *Addr = NegativeStride ? LastAddr : FirstAddr; + + // Get instructions for calculating address range + const DataLayout &DL = CilkSanImpl.M.getDataLayout(); + LLVMContext &Ctx = CilkSanImpl.M.getContext(); + SCEVExpander Expander(*SE, DL, "cilksan"); + + Value *AddrVal = + Expander.expandCodeFor(Addr, Type::getInt8PtrTy(Ctx), InsertPt); + Value *RangeVal = + Expander.expandCodeFor(RangeExpr, Type::getInt64Ty(Ctx), InsertPt); + HoistedHookArgs[I] = std::make_pair(AddrVal, RangeVal); + } + + // Map pairs of instruction and loop-exit to address and range arguments for + // coalesced instrumentation. + DenseMap, std::pair> + SunkHookArgs; + // Map to track which loops we have already created counters for + SmallMapVector LoopToCounterMap; + // Compute arguments for coalesced instrumentation sunk after the loop. + for (Instruction *I : LoopInstToSink) { + // Get the loop + Loop *L = LI.getLoopFor(I->getParent()); + + // Add a counter to count the number of iterations executed in this loop. + // In particular, this count will record the number of times the backedge of + // the loop is taken. + if (!LoopToCounterMap.count(L)) { + assert(L->getLoopPreheader() && "No preheader for loop"); + assert(L->getLoopLatch() && "No unique latch for loop"); + IRBuilder<> IRB(&L->getHeader()->front()); + LLVMContext &Ctx = CilkSanImpl.M.getContext(); + + PHINode *PN = IRB.CreatePHI(Type::getInt64Ty(Ctx), 2); + PN->addIncoming(ConstantInt::getNullValue(Type::getInt64Ty(Ctx)), + L->getLoopPreheader()); + IRB.SetInsertPoint(&*L->getLoopLatch()->getFirstInsertionPt()); + Value *Add = IRB.CreateAdd(PN, ConstantInt::get(Type::getInt64Ty(Ctx), 1), + "", true, true); + PN->addIncoming(Add, L->getLoopLatch()); + LoopToCounterMap.insert(std::make_pair(L, PN)); + } + + // Get the counter for this loop. + Value *Counter = LoopToCounterMap[L]; + + // Get the SCEV describing this instruction's pointer + const SCEV *V = SE->getSCEV(getLoadStorePointerOperand(I)); + const SCEVAddRecExpr *SrcAR = dyn_cast(V); + + // Get the stride + const SCEV *StrideExpr = SrcAR->getStepRecurrence(*SE); + assert(!isa(StrideExpr) && + "Stride should be computable"); + bool NegativeStride = SE->isKnownNegative(StrideExpr); + if (NegativeStride) + StrideExpr = SE->getNegativeSCEV(StrideExpr); + + // Get the first address accessed. + const SCEV *FirstAddr = SrcAR->getStart(); + + // Get the last address accessed, based on the counter value.. + const SCEV *BECount = SE->getUnknown(Counter); + const SCEV *LastAddr = SrcAR->evaluateAtIteration(BECount, *SE); + + // Get the size (number of bytes) of the address range accessed. + const SCEV *RangeExpr = NegativeStride + ? SE->getMinusSCEV(FirstAddr, LastAddr) + : SE->getMinusSCEV(LastAddr, FirstAddr); + RangeExpr = SE->getAddExpr(RangeExpr, StrideExpr); + // Get the start (lowest address) of the address range accessed. + const SCEV *Addr = NegativeStride ? LastAddr : FirstAddr; + + // Expand SCEV's into instructions for calculating the coalesced hook + // arguments in each exit block. + LLVMContext &Ctx = CilkSanImpl.M.getContext(); + const DataLayout &DL = CilkSanImpl.M.getDataLayout(); + SCEVExpander Expander(*SE, DL, "cilksan"); + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + for (BasicBlock *ExitBB : ExitBlocks) { + if (GetTopLevelTaskFor(ExitBB, L, TI)) + // Skip any exit blocks in a Tapir task inside the loop. These exit + // blocks lie on exception-handling paths, and to handle these blocks, + // it suffices to insert instrumentation in the unwind destination of + // the corresponding detach, which must also be a loop-exit block. + continue; + + // Instruction *InsertPt = &*ExitBB->getFirstInsertionPt(); + Instruction *InsertPt = + getLoopBlockInsertPt(ExitBB, CilkSanImpl.CsanAfterLoop, + /*AfterHook*/ true); + Value *AddrVal = + Expander.expandCodeFor(Addr, Type::getInt8PtrTy(Ctx), InsertPt); + Value *RangeVal = + Expander.expandCodeFor(RangeExpr, Type::getInt64Ty(Ctx), InsertPt); + + assert(isa(RangeVal) && + "Expected computation of RangeVal to produce an instruction."); + SunkHookArgs[std::make_pair(I, ExitBB)] = + std::make_pair(AddrVal, RangeVal); + } + } + + // Now insert coalesced instrumentation, including relevant MAAP checks. + // + // TODO: For now, we only handle LoadInst and StoreInst. Add other operations + // later, such as atomics and memory intrinsics. + + // Insert coalesced instrumentation hoisted before the loop. + for (Instruction *I : LoopInstToHoist) { + LLVM_DEBUG(dbgs() << "Loop instruction for hoisting instrumentation: " << *I + << "\n"); + + // Get the local ID of this instruction. + uint64_t LocalId; + if (LoadInst *LI = dyn_cast(I)) { + uint64_t LoadId = CilkSanImpl.LoadFED.add(*LI); + + // TODO: Don't recalculate underlying objects + uint64_t LoadObjId = CilkSanImpl.LoadObj.add( + *LI, + CilkSanImpl.lookupUnderlyingObject(getLoadStorePointerOperand(LI))); + assert(LoadId == LoadObjId && + "Load received different ID's in FED and object tables."); + LocalId = LoadId; + // Update the statistic here, since we're guaranteed to insert the hook at + // this point. + ++NumHoistedInstrumentedReads; + } else if (StoreInst *SI = dyn_cast(I)) { + uint64_t StoreId = CilkSanImpl.StoreFED.add(*SI); + + // TODO: Don't recalculate underlying objects + uint64_t StoreObjId = CilkSanImpl.StoreObj.add( + *SI, + CilkSanImpl.lookupUnderlyingObject(getLoadStorePointerOperand(SI))); + assert(StoreId == StoreObjId && + "Store received different ID's in FED and object tables."); + LocalId = StoreId; + // Update the statistic here, since we're guaranteed to insert the hook at + // this point. + ++NumHoistedInstrumentedWrites; + } else + llvm_unreachable("Unexpected instruction to hoist instrumentation."); + + // For now, there shouldn't be a reason to return false since we already + // verified the size, stride, and tripcount. + Loop *L = LI.getLoopFor(I->getParent()); + Instruction *InsertPt = + getLoopBlockInsertPt(L->getLoopPreheader(), CilkSanImpl.CsanBeforeLoop, + /*AfterLoop*/ false); + IRBuilder<> IRB(InsertPt); + if (MAAPChecks) { + Value *MAAPChk = getMAAPCheck(I, IRB); + if (MAAPChk != IRB.getFalse()) { + Instruction *CheckTerm = + SplitBlockAndInsertIfThen(IRB.CreateICmpEQ(MAAPChk, IRB.getFalse()), + InsertPt, false, nullptr, &DTU, &LI); + IRB.SetInsertPoint(CheckTerm); + } + } + IRB.SetCurrentDebugLocation(searchForDebugLoc(I)); + CilkSanImpl.instrumentLoadOrStoreHoisted( + I, HoistedHookArgs[I].first, HoistedHookArgs[I].second, IRB, LocalId); + Result = true; + } + + // Insert coalesced instrumentation sunk after the loop. + for (Instruction *I : LoopInstToSink) { + LLVM_DEBUG(dbgs() << "Loop instruction for sinking instrumentation: " << *I + << "\n"); + Loop *L = LI.getLoopFor(I->getParent()); + + // Get the local ID of this instruction. We do this computation early to + // avoid recomputing the local ID once per exit block. + uint64_t LocalId; + if (LoadInst *LI = dyn_cast(I)) { + uint64_t LoadId = CilkSanImpl.LoadFED.add(*LI); + + // TODO: Don't recalculate underlying objects + uint64_t LoadObjId = CilkSanImpl.LoadObj.add( + *LI, + CilkSanImpl.lookupUnderlyingObject(getLoadStorePointerOperand(LI))); + assert(LoadId == LoadObjId && + "Load received different ID's in FED and object tables."); + LocalId = LoadId; + // Update the statistic here, since we're guaranteed to insert the hooks + // at this point, and to avoid overcounting the number of instructions on + // loops with multiple exits. + ++NumSunkInstrumentedReads; + } else if (StoreInst *SI = dyn_cast(I)) { + uint64_t StoreId = CilkSanImpl.StoreFED.add(*SI); + + // TODO: Don't recalculate underlying objects + uint64_t StoreObjId = CilkSanImpl.StoreObj.add( + *SI, + CilkSanImpl.lookupUnderlyingObject(getLoadStorePointerOperand(SI))); + assert(StoreId == StoreObjId && + "Store received different ID's in FED and object tables."); + LocalId = StoreId; + // Update the statistic here, since we're guaranteed to insert the hooks + // at this point, and to avoid overcounting the number of instructions on + // loops with multiple exits. + ++NumSunkInstrumentedWrites; + } else + llvm_unreachable("Unexpected instruction to sink instrumentation."); + + SmallVector ExitBlocks; + L->getUniqueExitBlocks(ExitBlocks); + for (BasicBlock *ExitBB : ExitBlocks) { + if (GetTopLevelTaskFor(ExitBB, L, TI)) + // Skip any exit blocks in a Tapir task inside the loop. These exit + // blocks lie on exception-handling paths, and to handle these blocks, + // it suffices to insert instrumentation in the unwind destination of + // the corresponding detach, which must also be a loop-exit block. + continue; + + // After the loop, perform the coalesced read/write. + auto HookArgsKey = std::make_pair(I, ExitBB); + + // Insert the hook call after the computation of RangeVal. + Instruction *InsertPt = + cast(SunkHookArgs[HookArgsKey].second) + ->getIterator() + ->getNextNode(); + IRBuilder<> IRB(&*InsertPt); + if (MAAPChecks) { + Value *MAAPChk = getMAAPCheck(I, IRB); + if (MAAPChk != IRB.getFalse()) { + Instruction *CheckTerm = SplitBlockAndInsertIfThen( + IRB.CreateICmpEQ(MAAPChk, IRB.getFalse()), &*InsertPt, false, + nullptr, &DTU, &LI); + IRB.SetInsertPoint(CheckTerm); + } + } + IRB.SetCurrentDebugLocation(searchForDebugLoc(I)); + CilkSanImpl.instrumentLoadOrStoreHoisted( + I, SunkHookArgs[HookArgsKey].first, SunkHookArgs[HookArgsKey].second, + IRB, LocalId); + Result = true; + } + } + + return Result; +} + +bool CilkSanitizerImpl::instrumentLoadOrStoreHoisted(Instruction *I, + Value *Addr, Value *Size, + IRBuilder<> &IRB, + uint64_t LocalId) { + // The caller of this method is guaranteed to have computed the Addr and Size + // values with the right type for the hook, so no additional type conversions + // are needed. + CsiLoadStoreProperty Prop; + if (LoadInst *LI = dyn_cast(I)) { + Prop.setAlignment(MaybeAlign(LI->getAlign())); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + // Instrument the load + Value *CsiId = LoadFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, Addr, Size, Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanLargeRead, Args); + IRB.SetInstDebugLocation(Call); + } else if (StoreInst *SI = dyn_cast(I)) { + Prop.setAlignment(SI->getAlign()); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + // Instrument the store + Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, Addr, Size, Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanLargeWrite, Args); + IRB.SetInstDebugLocation(Call); + } + return true; +} + +static bool CheckSanitizeCilkAttr(Function &F) { + if (IgnoreSanitizeCilkAttr) + return true; + return F.hasFnAttribute(Attribute::SanitizeCilk); +} + +bool CilkSanitizerImpl::setupFunction(Function &F, bool NeedToSetupCalls) { + if (F.empty() || shouldNotInstrumentFunction(F) || + LinkedFromBitcode.count(&F) || !CheckSanitizeCilkAttr(F)) { + LLVM_DEBUG({ + dbgs() << "Skipping " << F.getName() << "\n"; + if (F.empty()) + dbgs() << " Empty function\n"; + else if (shouldNotInstrumentFunction(F)) + dbgs() << " Function should not be instrumented\n"; + else if (LinkedFromBitcode.count(&F)) + dbgs() << " Function from linked-in bitcode\n"; + else if (!CheckSanitizeCilkAttr(F)) + dbgs() << " Function lacks sanitize_cilk attribute\n"; + }); + return false; + } + + LLVM_DEBUG(dbgs() << "Setting up " << F.getName() + << " for instrumentation\n"); + + // TODO: Move these steps into csi-setup pass. + + if (NeedToSetupCalls && Options.CallsMayThrow) + // Promote calls to invokes to insert instrumentation in exception-handling + // code. + setupCalls(F); + + DominatorTree &DT = GetDomTree(F); + LoopInfo &LI = GetLoopInfo(F); + + if (Options.InstrumentLoops) + // Simplify loops to prepare for loop instrumentation + for (Loop *L : LI) + simplifyLoop(L, &DT, &LI, nullptr, nullptr, nullptr, + /* PreserveLCSSA */ false); + + // Canonicalize the CFG for instrumentation. + setupBlocks(F, &DT, &LI); + + return true; +} + +/// Set DebugLoc on the call instruction to a CSI hook, based on the +/// debug information of the instrumented instruction. +static void setInstrumentationDebugLoc(Function &Instrumented, + Instruction *Call) { + DISubprogram *Subprog = Instrumented.getSubprogram(); + if (Subprog) { + LLVMContext &C = Instrumented.getParent()->getContext(); + Call->setDebugLoc(DILocation::get(C, 0, 0, Subprog)); + } +} + +bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { + + if (F.empty() || shouldNotInstrumentFunction(F) || + !CheckSanitizeCilkAttr(F)) { + LLVM_DEBUG({ + dbgs() << "Skipping " << F.getName() << "\n"; + if (F.empty()) + dbgs() << " Empty function\n"; + else if (shouldNotInstrumentFunction(F)) + dbgs() << " Function should not be instrumented\n"; + else if (!CheckSanitizeCilkAttr(F)) + dbgs() << " Function lacks sanitize_cilk attribute\n";}); + return false; + } + + LLVM_DEBUG(dbgs() << "Instrumenting " << F.getName() << "\n"); + + SmallVector AllLoadsAndStores; + SmallVector LocalLoadsAndStores; + SmallVector AtomicAccesses; + SmallVector MemIntrinCalls; + SmallVector IntrinsicCalls; + SmallVector LibCalls; + SmallVector Callsites; + // Ancillary instructions + SmallPtrSet Allocas; + SmallPtrSet AllocationFnCalls; + SmallPtrSet FreeCalls; + SmallVector Syncs; + DenseMap SRCounters; + DenseMap SyncRegNums; + + // Find instructions that can be hoisted or sinked + SmallPtrSet LoopInstToHoist; + SmallPtrSet LoopInstToSink; + SmallPtrSet TapirLoops; + + const TargetLibraryInfo *TLI = &GetTLI(F); + DominatorTree &DT = GetDomTree(F); + LoopInfo &LI = GetLoopInfo(F); + TaskInfo &TI = GetTaskInfo(F); + RaceInfo &RI = GetRaceInfo(F); + + ICFLoopSafetyInfo SafetyInfo; + + ScalarEvolution &SE = *(RI.getSE()); + + for (BasicBlock &BB : F) { + // Record the Tapir sync instructions found + if (SyncInst *SI = dyn_cast(BB.getTerminator())) + Syncs.push_back(SI); + + // get loop for BB + Loop *L = LI.getLoopFor(&BB); + if (L) + SafetyInfo.computeLoopSafetyInfo(L); + + // Record the memory accesses in the basic block + for (Instruction &Inst : BB) { + bool CanCoalesce = false; + // If the instruction is in a loop and can only race via ancestor, and + // size < stride, store it. + if (L && EnableStaticRaceDetection && LoopHoisting && + SafetyInfo.isGuaranteedToExecute(Inst, &DT, &TI, L)) { + // TODO: For now, only look at loads and stores. Add atomics later. + // Need to add any others? + if (isa(Inst) || isa(Inst)) { + bool RaceViaAncestor = false; + bool OtherRace = false; + for (const RaceInfo::RaceData &RD : RI.getRaceData(&Inst)) { + if (RaceInfo::isRaceViaAncestor(RD.Type)) { + RaceViaAncestor = true; + } else if (RaceInfo::isOpaqueRace(RD.Type)) { + LLVM_DEBUG(dbgs() << "Can't hoist or sink instrumentation for " + << Inst << "\n Opaque race.\n"); + OtherRace = true; + break; + } else if (RaceInfo::isLocalRace(RD.Type)) { + if (!RD.Racer.isValid()) { + LLVM_DEBUG(dbgs() + << "Can't hoist or sink instrumentation for " << Inst + << "\n Local race with opaque racer.\n"); + OtherRace = true; + break; + } else if (LI.getLoopFor(RD.Racer.I->getParent()) == L) { + LLVM_DEBUG(dbgs() + << "Can't hoist or sink instrumentation for " << Inst + << "\n Local race with racer in same loop: " + << *RD.Racer.I << "\n"); + OtherRace = true; + break; + } + RaceViaAncestor = true; + } + } + // If this instruction can only race via an ancestor, see if it can be + // hoisted. + if (RaceViaAncestor && !OtherRace) { + const SCEV *Size = SE.getElementSize(&Inst); + const SCEV *V = SE.getSCEV(getLoadStorePointerOperand(&Inst)); + // If not an AddRecExpr, don't proceed + if (const SCEVAddRecExpr *SrcAR = dyn_cast(V)) { + const SCEV *Stride = SrcAR->getStepRecurrence(SE); + const SCEV *Diff; + if (SE.isKnownNonNegative(Stride)) { + Diff = SE.getMinusSCEV(Size, Stride); + } else { + // If we can't compare size and stride, + // SE.isKnownNonNegative(Diff) will be false. + Diff = SE.getAddExpr(Size, Stride); + } + bool isTapirLoop = static_cast(getTaskIfTapirLoop(L, &TI)); + if (isTapirLoop) + TapirLoops.insert(L); + const SCEV *TripCount = getRuntimeTripCount(*L, &SE, isTapirLoop); + + if (SE.isKnownNonNegative(Diff)) { + if (!isa(TripCount) && + SE.isAvailableAtLoopEntry(SrcAR->getStart(), L)) { + // Can hoist if stride <= size and the tripcount is known and + // the start is available at loop entry. + LoopInstToHoist.insert(&Inst); + CanCoalesce = true; + LLVM_DEBUG(dbgs() << "Can hoist instrumentation for " << Inst << "\n"); + } else if (!isa( + SE.getConstantMaxBackedgeTakenCount(L))) { + // Can sink if stride <= size and the tripcount is unknown but + // guaranteed to be finite. + LoopInstToSink.insert(&Inst); + CanCoalesce = true; + LLVM_DEBUG(dbgs() << "Can sink instrumentation for " << Inst << "\n"); + } else { + LLVM_DEBUG(dbgs() + << "Can't hoist or sink instrumentation for " + << Inst << "\n TripCount = " << *TripCount + << "\n SrcAR->getStart() = " << *SrcAR->getStart() + << "\n SE.getConstantMaxBackedgeTakenCount(L) = " + << *SE.getConstantMaxBackedgeTakenCount(L) + << "\n"); + } + } else { + LLVM_DEBUG(dbgs() << "Can't hoist instrumentation for " << Inst + << "\n Diff SCEV not known non-negative: " + << *Diff << "\n"); + } + } else { + LLVM_DEBUG( + dbgs() + << "Can't hoist or sink instrumentation for " << Inst + << "\n SCEV for load/store pointer operand not AddRecExpr: " + << *V << ": " << V->getSCEVType() << "\n"); + } + } + } + } + + if (!CanCoalesce) { + // TODO: Handle VAArgInst + if (isa(Inst) || isa(Inst)) + LocalLoadsAndStores.push_back(&Inst); + else if (isa(Inst) || isa(Inst)) + AtomicAccesses.push_back(&Inst); + else if (isa(Inst)) + Allocas.insert(&Inst); + else if (isa(Inst)) { + // if (CallInst *CI = dyn_cast(&Inst)) + // maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI); + + // If we find a sync region, record it. + if (const IntrinsicInst *II = dyn_cast(&Inst)) + if (Intrinsic::syncregion_start == II->getIntrinsicID()) { + // Identify this sync region with a counter value, where all sync + // regions within a function or task are numbered from 0. + if (TI.getTaskFor(&BB)) { + BasicBlock *TEntry = TI.getTaskFor(&BB)->getEntry(); + // Create a new counter if need be. + if (!SRCounters.count(TEntry)) + SRCounters[TEntry] = 0; + SyncRegNums[&Inst] = SRCounters[TEntry]++; + } + } + + // Record this function call as either an allocation function, a call to + // free (or delete), a memory intrinsic, or an ordinary real function + // call. + if (isAllocFn(&Inst, TLI)) + AllocationFnCalls.insert(&Inst); + else if (isFreeFn(&Inst, TLI)) + FreeCalls.insert(&Inst); + else if (isa(Inst)) + MemIntrinCalls.push_back(&Inst); + else if (!simpleCallCannotRace(Inst) && !shouldIgnoreCall(Inst)) { + if (isa(&Inst)) { + if (Inst.mayReadOrWriteMemory()) + IntrinsicCalls.push_back(&Inst); + } else if (isLibCall(Inst, TLI)) { + if (Inst.mayReadOrWriteMemory()) + LibCalls.push_back(&Inst); + } else { + Callsites.push_back(&Inst); + } + } + } + + // Add the current set of local loads and stores to be considered for + // instrumentation. + if (!simpleCallCannotRace(Inst)) { + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, + TI, LI, TLI); + } + } + } + chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores, TI, + LI, TLI); + } + + // Evaluate the tasks that might be in parallel with each spindle. + MaybeParallelTasks MPTasks; + TI.evaluateParallelState(MPTasks); + + // Map each detach instruction with the sync instructions that could sync it. + for (SyncInst *Sync : Syncs) + for (const Task *MPT : + MPTasks.TaskList[TI.getSpindleFor(Sync->getParent())]) + DetachToSync[MPT->getDetach()].push_back(Sync); + + // Record objects involved in races + for (auto &ObjRD : RI.getObjectMRForRace()) + ObjectMRForRace[ObjRD.first] = ObjRD.second; + + uint64_t LocalId = getLocalFunctionID(F); + IRBuilder<> IRB(getEntryBBInsertPt(F.getEntryBlock())); + Value *FuncId = FunctionFED.localToGlobalId(LocalId, IRB); + + bool Result = false; + if (!EnableStaticRaceDetection) { + SimpleInstrumentor FuncI(*this, TI, LI, DT, TLI); + Result |= FuncI.InstrumentSimpleInstructions(AllLoadsAndStores); + Result |= FuncI.InstrumentSimpleInstructions(AtomicAccesses); + Result |= FuncI.InstrumentAnyMemIntrinsics(MemIntrinCalls); + Result |= FuncI.InstrumentCalls(IntrinsicCalls); + Result |= FuncI.InstrumentCalls(LibCalls); + Result |= FuncI.InstrumentCalls(Callsites); + + // Instrument ancillary instructions including allocas, allocation-function + // calls, free calls, detaches, and syncs. + Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, + FreeCalls, SyncRegNums, + SRCounters, DL); + } else { + Instrumentor FuncI(*this, RI, TI, LI, DT, TLI); + + // Insert MAAP flags for each function argument. + FuncI.InsertArgMAAPs(F, FuncId); + + Result |= FuncI.InstrumentSimpleInstructions(AllLoadsAndStores); + Result |= FuncI.InstrumentSimpleInstructions(AtomicAccesses); + Result |= FuncI.InstrumentAnyMemIntrinsics(MemIntrinCalls); + Result |= FuncI.InstrumentCalls(IntrinsicCalls); + Result |= FuncI.InstrumentCalls(LibCalls); + Result |= FuncI.InstrumentCalls(Callsites); + + // Find detaches that need to be instrumented for loop instructions whose + // instrumentation will be coalesced. + FuncI.GetDetachesForCoalescedInstrumentation(LoopInstToHoist, + LoopInstToSink); + + // Instrument ancillary instructions including allocas, allocation-function + // calls, free calls, detaches, and syncs. + Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, + FreeCalls, SyncRegNums, + SRCounters, DL); + + // Hoist and sink instrumentation when possible (applies to all loops, + // not just Tapir loops) + // Also inserts MAAP checks for hoisted/sinked instrumentation + Result |= + FuncI.InstrumentLoops(LoopInstToHoist, LoopInstToSink, TapirLoops, &SE); + + // Once we have handled ancillary instructions, we've done the necessary + // analysis on this function. We now perform delayed instrumentation, which + // can involve changing the CFG and thereby violating some analyses. + Result |= FuncI.PerformDelayedInstrumentation(); + } + + if (Result) { + bool MaySpawn = !TI.isSerial(); + if (InstrumentationSet & SERIESPARALLEL) { + IRBuilder<> IRB(cast(FuncId)->getNextNode()); + CsiFuncProperty FuncEntryProp; + FuncEntryProp.setMaySpawn(MaySpawn); + if (MaySpawn) + FuncEntryProp.setNumSyncReg(SRCounters[TI.getRootTask()->getEntry()]); + // TODO: Determine if we actually want the frame pointer, not the stack + // pointer. + Value *FrameAddr = + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::frameaddress, + IRB.getInt8PtrTy()), + {IRB.getInt32(0)}); + Value *StackSave = + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + CallInst *EntryCall = + IRB.CreateCall(CsanFuncEntry, {FuncId, FrameAddr, StackSave, + FuncEntryProp.getValue(IRB)}); + setInstrumentationDebugLoc(F, EntryCall); + } else { + // Search for a call to CsanFuncEntry, and update its ID argument. + for (BasicBlock::iterator I = cast(FuncId)->getIterator(), + E = F.getEntryBlock().end(); + I != E; ++I) { + if (CallBase *CB = dyn_cast(&*I)) + if (CB->getCalledFunction() == CsanFuncEntry.getCallee()) { + CB->setArgOperand(0, FuncId); + break; + } + } + } + + EscapeEnumerator EE(F, "csan_cleanup", false); + while (IRBuilder<> *AtExit = EE.Next()) { + if (InstrumentationSet & SERIESPARALLEL) { + uint64_t ExitLocalId = FunctionExitFED.add(*AtExit->GetInsertPoint()); + Value *ExitCsiId = FunctionExitFED.localToGlobalId(ExitLocalId, *AtExit); + CsiFuncExitProperty FuncExitProp; + FuncExitProp.setMaySpawn(MaySpawn); + FuncExitProp.setEHReturn(isa(AtExit->GetInsertPoint())); + CallInst *ExitCall = AtExit->CreateCall( + CsanFuncExit, {ExitCsiId, FuncId, FuncExitProp.getValue(*AtExit)}); + setInstrumentationDebugLoc(F, ExitCall); + } else { + // Search for a call to CsanFuncExit, and update its ID argument. + for (BasicBlock::iterator I = AtExit->GetInsertBlock()->begin(), + E = AtExit->GetInsertBlock()->end(); + I != E; ++I) { + if (CallBase *CB = dyn_cast(&*I)) + if (CB->getCalledFunction() == CsanFuncExit.getCallee()) { + CB->setArgOperand(1, FuncId); + break; + } + } + } + } + } + + // Record aggregate race information for the function and its arguments for + // interprocedural analysis. + // + // TODO: Clean this up + RaceInfo::RaceType FuncRT = RaceInfo::None; + for (Instruction *I : AllLoadsAndStores) + FuncRT = RaceInfo::unionRaceTypes(FuncRT, RI.getRaceType(I)); + for (Instruction *I : AtomicAccesses) + FuncRT = RaceInfo::unionRaceTypes(FuncRT, RI.getRaceType(I)); + for (Instruction *I : MemIntrinCalls) + FuncRT = RaceInfo::unionRaceTypes(FuncRT, RI.getRaceType(I)); + for (Instruction *I : Callsites) { + if (const CallBase *CB = dyn_cast(I)) { + // Use updated information about the race type of the call, if it's + // available. + const Function *CF = CB->getCalledFunction(); + if (FunctionRaceType.count(CF)) { + FuncRT = RaceInfo::unionRaceTypes(FuncRT, FunctionRaceType[CF]); + // Preserve the local-race marking if the callsite itself is involved in + // a local race. + if (RaceInfo::isLocalRace(RI.getRaceType(I))) + FuncRT = RaceInfo::unionRaceTypes(FuncRT, RaceInfo::Local); + continue; + } + } + FuncRT = RaceInfo::unionRaceTypes(FuncRT, RI.getRaceType(I)); + } + FunctionRaceType[&F] = FuncRT; + + return Result; +} + +bool CilkSanitizerImpl::instrumentLoadOrStore(Instruction *I, + IRBuilder<> &IRB) { + bool IsWrite = isa(*I); + Value *Addr = IsWrite + ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + Type *Ty = + IsWrite ? cast(I)->getValueOperand()->getType() : I->getType(); + + // swifterror memory addresses are mem2reg promoted by instruction selection. + // As such they cannot have regular uses like an instrumentation function and + // it makes no sense to track them as memory. + if (Addr->isSwiftError()) + return false; + + int NumBytesAccessed = getNumBytesAccessed(Ty, DL); + if (-1 == NumBytesAccessed) { + // Ignore accesses with bad sizes. + NumAccessesWithBadSize++; + return false; + } + + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + const Align Alignment = IsWrite + ? cast(I)->getAlign() + : cast(I)->getAlign(); + CsiLoadStoreProperty Prop; + Prop.setAlignment(Alignment); + Prop.setIsAtomic(I->isAtomic()); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + if (IsWrite) { + // Instrument store + uint64_t LocalId = StoreFED.add(*I); + uint64_t StoreObjId = StoreObj.add(*I, lookupUnderlyingObject(Addr)); + assert(LocalId == StoreObjId && + "Store received different ID's in FED and object tables."); + Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.getInt32(NumBytesAccessed), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanWrite, Args); + IRB.SetInstDebugLocation(Call); + NumInstrumentedWrites++; + } else { + // Instrument load + uint64_t LocalId = LoadFED.add(*I); + uint64_t LoadObjId = LoadObj.add(*I, lookupUnderlyingObject(Addr)); + assert(LocalId == LoadObjId && + "Load received different ID's in FED and object tables."); + Value *CsiId = LoadFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.getInt32(NumBytesAccessed), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanRead, Args); + IRB.SetInstDebugLocation(Call); + NumInstrumentedReads++; + } + return true; +} + +bool CilkSanitizerImpl::instrumentAtomic(Instruction *I, IRBuilder<> &IRB) { + Value *Addr; + Type *Ty; + Align Alignment; + if (AtomicRMWInst *RMWI = dyn_cast(I)) { + Addr = RMWI->getPointerOperand(); + Ty = RMWI->getValOperand()->getType(); + Alignment = RMWI->getAlign(); + } else if (AtomicCmpXchgInst *CASI = dyn_cast(I)) { + Addr = CASI->getPointerOperand(); + Ty = CASI->getNewValOperand()->getType(); + Alignment = CASI->getAlign(); + } else { + return false; + } + + int NumBytesAccessed = getNumBytesAccessed(Ty, DL); + if (-1 == NumBytesAccessed) { + // Ignore accesses with bad sizes. + NumAccessesWithBadSize++; + return false; + } + + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + CsiLoadStoreProperty Prop; + Prop.setAlignment(Alignment); + Prop.setIsAtomic(true); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + uint64_t LocalId = StoreFED.add(*I); + uint64_t StoreObjId = StoreObj.add(*I, lookupUnderlyingObject(Addr)); + assert(LocalId == StoreObjId && + "Store received different ID's in FED and object tables."); + Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, + IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.getInt32(NumBytesAccessed), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanWrite, Args); + IRB.SetInstDebugLocation(Call); + NumInstrumentedWrites++; + return true; +} + +FunctionCallee CilkSanitizerImpl::getOrInsertSynthesizedHook(StringRef Name, + FunctionType *T, + AttributeList AL) { + // If no bitcode file has been linked, then we cannot check if it contains a + // particular library hook. Simply return the hook. If the Cilksan library + // doesn't contain that hook, the linker will raise an error. + if (!LinkedBitcode) + return getHookFunction(Name, T, AL); + + // Check if the linked bitcode file contains the library hook. If it does, + // return that hook. + if (FunctionsInBitcode.contains(std::string(Name))) + return getHookFunction(Name, T, AL); + + // We did not find the library hook in the linked bitcode file. Synthesize a + // default version of the hook that simply calls __csan_default_libhook. + FunctionCallee NewHook = M.getOrInsertFunction(Name, T, AL); + Function *NewHookFn = cast(NewHook.getCallee()); + NewHookFn->setOnlyAccessesInaccessibleMemOrArgMem(); + NewHookFn->setDoesNotThrow(); + BasicBlock *Entry = BasicBlock::Create(M.getContext(), "entry", NewHookFn); + IRBuilder<> IRB(ReturnInst::Create(M.getContext(), Entry)); + + // Insert a call to the default library function hook + Type *IDType = IRB.getInt64Ty(); + FunctionType *DefaultHookTy = + FunctionType::get(IRB.getVoidTy(), + {/*call_id*/ + IDType, /*func_id*/ IDType, + /*MAAP_count*/ IRB.getInt8Ty()}, + /*isVarArg*/ false); + FunctionCallee DefaultHook = + M.getOrInsertFunction("__csan_default_libhook", DefaultHookTy); + IRB.CreateCall(DefaultHook, {NewHookFn->getArg(0), NewHookFn->getArg(1), + NewHookFn->getArg(2)}); + return NewHook; +} + +// Check if we need to spill a value of this type onto the stack to pass it to a +// hook. +static bool NeedToSpillType(const Type *T) { + return T->isVectorTy() || T->isStructTy(); +} + +bool CilkSanitizerImpl::instrumentIntrinsicCall( + Instruction *I, SmallVectorImpl *MAAPVals) { + assert(!callsPlaceholderFunction(*I) && + "instrumentIntrinsicCall called on placeholder function"); + + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return true; + + CallBase *CB = dyn_cast(I); + if (!CB) + return false; + Function *Called = CB->getCalledFunction(); + + IRBuilder<> IRB(I); + LLVMContext &Ctx = IRB.getContext(); + uint64_t LocalId = CallsiteFED.add(*I); + Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); + Value *FuncId = GetCalleeFuncID(Called, IRB); + assert(FuncId != NULL); + + Value *NumMVVal = IRB.getInt8(0); + if (MAAPVals && !MAAPVals->empty()) { + unsigned NumMV = MAAPVals->size(); + NumMVVal = IRB.getInt8(NumMV); + } + + CsiCallProperty Prop; + // TODO: Set appropriate property values for this intrinsic call + Value *PropVal = Prop.getValue(IRB); + + // Since C/C++ does not like '.' characters in function names, convert '.' to + // '_' in the hook name. + SmallString<256> Buf; + for (char C : Called->getName().bytes()) { + if ('.' == C) + Buf.push_back('_'); + else + Buf.push_back(C); + } + Type *IDType = IRB.getInt64Ty(); + + // If the intrinsic does not return, insert the hook before the intrinsic. + if (CB->doesNotReturn()) { + // Synthesize the before hook for this function. + SmallVector BeforeHookParamTys( + {IDType, /*callee func_id*/ IDType, + /*Num MAAPVal*/ IRB.getInt8Ty(), CsiCallProperty::getType(Ctx)}); + SmallVector BeforeHookParamVals( + {CallsiteId, FuncId, NumMVVal, PropVal}); + + // Populate the BeforeHook parameters with the parameters of the + // instrumented function itself. + Value *SavedStack = nullptr; + const DataLayout &DL = M.getDataLayout(); + for (Value *Arg : CB->args()) { + Type *ArgTy = Arg->getType(); + if (!NeedToSpillType(ArgTy)) { + // We can simply pass the argument directly to the hook. + BeforeHookParamTys.push_back(ArgTy); + BeforeHookParamVals.push_back(Arg); + continue; + } + // We need to spill the argument onto the stack. + + // Save the stack pointer, if we haven't already + if (!SavedStack) + SavedStack = + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + + // Spill the argument onto the stack + AllocaInst *ArgSpill = IRB.CreateAlloca(ArgTy); + IRB.CreateAlignedStore(Arg, ArgSpill, DL.getStackAlignment()); + + // Add the spilled argument + BeforeHookParamTys.push_back(ArgSpill->getType()); + BeforeHookParamVals.push_back(ArgSpill); + } + FunctionType *BeforeHookTy = FunctionType::get( + IRB.getVoidTy(), BeforeHookParamTys, Called->isVarArg()); + FunctionCallee BeforeIntrinCallHook = getOrInsertSynthesizedHook( + ("__csan_" + Buf).str(), BeforeHookTy); + + // Insert the hook before the call + insertHookCall(I, BeforeIntrinCallHook, BeforeHookParamVals); + + // If we previously saved the stack pointer, restore it + if (SavedStack) + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore), + {SavedStack}); + return true; + } + + // Otherwise, insert the hook after the intrinsic. + assert(!isa(I) && + "instrumentIntrinsicCall called on invoke instruction"); + + BasicBlock::iterator Iter(I); + Iter++; + IRB.SetInsertPoint(&*Iter); + + // Synthesize the after hook for this function. + SmallVector AfterHookParamTys({IDType, /*callee func_id*/ IDType, + /*Num MAAPVal*/ IRB.getInt8Ty(), + CsiCallProperty::getType(Ctx)}); + SmallVector AfterHookParamVals( + {CallsiteId, FuncId, NumMVVal, PropVal}); + + // Populate the AfterHook parameters with the parameters of the instrumented + // function itself. + Value *SavedStack = nullptr; + const DataLayout &DL = M.getDataLayout(); + if (!Called->getReturnType()->isVoidTy()) { + Type *RetTy = Called->getReturnType(); + if (!NeedToSpillType(RetTy)) { + // We can simply pass the return value directly to the hook. + AfterHookParamTys.push_back(RetTy); + AfterHookParamVals.push_back(CB); + } else { + // We need to spill the return value onto the stack. + + // Save the stack pointer, if we haven't already + if (!SavedStack) + SavedStack = + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + + // Spill the return value onto the stack + AllocaInst *RetSpill = IRB.CreateAlloca(RetTy); + IRB.CreateAlignedStore(CB, RetSpill, DL.getStackAlignment()); + + // Add the spilled return value + AfterHookParamTys.push_back(RetSpill->getType()); + AfterHookParamVals.push_back(RetSpill); + } + } + for (Value *Arg : CB->args()) { + Type *ArgTy = Arg->getType(); + if (!NeedToSpillType(ArgTy)) { + // We can simply pass the argument directly to the hook. + AfterHookParamTys.push_back(ArgTy); + AfterHookParamVals.push_back(Arg); + continue; + } + // We need to spill the argument onto the stack. + + // Save the stack pointer, if we haven't already + if (!SavedStack) + SavedStack = + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + + // Spill the argument onto the stack + AllocaInst *ArgSpill = IRB.CreateAlloca(ArgTy); + IRB.CreateAlignedStore(Arg, ArgSpill, DL.getStackAlignment()); + + // Add the spilled argument + AfterHookParamTys.push_back(ArgSpill->getType()); + AfterHookParamVals.push_back(ArgSpill); + } + + // Special-case intrinsics. + IntrinsicInst *II = dyn_cast(I); + switch (II->getIntrinsicID()) { + case Intrinsic::hyper_lookup: { + FunctionType *AfterHookTy = + FunctionType::get(IRB.getInt8PtrTy(), AfterHookParamTys, Called->isVarArg()); + FunctionCallee AfterIntrinCallHook = + getOrInsertSynthesizedHook(("__csan_" + Buf).str(), AfterHookTy); + CallInst *HookCall = + insertHookCall(&*Iter, AfterIntrinCallHook, AfterHookParamVals); + II->replaceUsesWithIf(HookCall, [HookCall](Use &U) { + return cast(U.getUser()) != HookCall; + }); + return true; + } + } + + FunctionType *AfterHookTy = + FunctionType::get(IRB.getVoidTy(), AfterHookParamTys, Called->isVarArg()); + FunctionCallee AfterIntrinCallHook = + getOrInsertSynthesizedHook(("__csan_" + Buf).str(), AfterHookTy); + + // Insert the hook call + insertHookCall(&*Iter, AfterIntrinCallHook, AfterHookParamVals); + + if (SavedStack) { + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore), + {SavedStack}); + } + return true; +} + +bool CilkSanitizerImpl::instrumentLibCall(Instruction *I, + SmallVectorImpl *MAAPVals) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return true; + + bool IsInvoke = isa(I); + CallBase *CB = dyn_cast(I); + if (!CB) + return false; + Function *Called = CB->getCalledFunction(); + + IRBuilder<> IRB(I); + LLVMContext &Ctx = IRB.getContext(); + uint64_t LocalId = CallsiteFED.add(*I); + Value *DefaultID = getDefaultID(IRB); + Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); + Value *FuncId = GetCalleeFuncID(Called, IRB); + assert(FuncId != NULL); + + Value *NumMVVal = IRB.getInt8(0); + if (MAAPVals && !MAAPVals->empty()) { + unsigned NumMV = MAAPVals->size(); + NumMVVal = IRB.getInt8(NumMV); + } + + CsiCallProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + // TODO: Set appropriate property values for this intrinsic call + Value *PropVal = Prop.getValue(IRB); + Type *IDType = IRB.getInt64Ty(); + + // If the intrinsic does not return, insert the hook before the intrinsic. + if (CB->doesNotReturn()) { + // Synthesize the before hook for this function. + SmallVector BeforeHookParamTys( + {IDType, /*callee func_id*/ IDType, + /*MAAP_count*/ IRB.getInt8Ty(), CsiCallProperty::getType(Ctx)}); + SmallVector BeforeHookParamVals( + {CallsiteId, FuncId, NumMVVal, PropVal}); + BeforeHookParamTys.append(Called->getFunctionType()->param_begin(), + Called->getFunctionType()->param_end()); + BeforeHookParamVals.append(CB->arg_begin(), CB->arg_end()); + FunctionType *BeforeHookTy = FunctionType::get( + IRB.getVoidTy(), BeforeHookParamTys, Called->isVarArg()); + FunctionCallee BeforeLibCallHook = getOrInsertSynthesizedHook( + ("__csan_" + Called->getName()).str(), BeforeHookTy); + + insertHookCall(I, BeforeLibCallHook, BeforeHookParamVals); + return true; + } + + // Otherwise, insert the hook after the intrinsic. + + // Synthesize the after hook for this function. + SmallVector AfterHookParamTys( + {IDType, /*callee func_id*/ IDType, + /*Num MAAPVal*/ IRB.getInt8Ty(), CsiCallProperty::getType(Ctx)}); + SmallVector AfterHookParamVals( + {CallsiteId, FuncId, NumMVVal, PropVal}); + SmallVector AfterHookDefaultVals( + {DefaultID, DefaultID, IRB.getInt8(0), DefaultPropVal}); + if (!Called->getReturnType()->isVoidTy()) { + AfterHookParamTys.push_back(Called->getReturnType()); + AfterHookParamVals.push_back(CB); + AfterHookDefaultVals.push_back( + Constant::getNullValue(Called->getReturnType())); + } + AfterHookParamTys.append(Called->getFunctionType()->param_begin(), + Called->getFunctionType()->param_end()); + AfterHookParamVals.append(CB->arg_begin(), CB->arg_end()); + for (Value *Arg : CB->args()) + AfterHookDefaultVals.push_back(Constant::getNullValue(Arg->getType())); + FunctionType *AfterHookTy = + FunctionType::get(IRB.getVoidTy(), AfterHookParamTys, Called->isVarArg()); + FunctionCallee AfterLibCallHook = getOrInsertSynthesizedHook( + ("__csan_" + Called->getName()).str(), AfterHookTy); + + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + insertHookCallInSuccessorBB( + II->getNormalDest(), II->getParent(), AfterLibCallHook, + AfterHookParamVals, AfterHookDefaultVals); + // Don't insert any instrumentation in the exception block. + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + insertHookCall(&*Iter, AfterLibCallHook, AfterHookParamVals); + } + + return true; +} + +bool CilkSanitizerImpl::instrumentCallsite(Instruction *I, + SmallVectorImpl *MAAPVals) { + if (callsPlaceholderFunction(*I)) + return false; + + bool IsInvoke = isa(I); + CallBase *CB = dyn_cast(I); + if (!CB) + return false; + Function *Called = CB->getCalledFunction(); + + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return true; + + IRBuilder<> IRB(I); + uint64_t LocalId = CallsiteFED.add(*I); + Value *DefaultID = getDefaultID(IRB); + Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); + Value *FuncId = GetCalleeFuncID(Called, IRB); + assert(FuncId != NULL); + + Value *NumMVVal = IRB.getInt8(0); + if (MAAPVals && !MAAPVals->empty()) { + unsigned NumMV = MAAPVals->size(); + NumMVVal = IRB.getInt8(NumMV); + } + + CsiCallProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + Prop.setIsIndirect(!Called); + Value *PropVal = Prop.getValue(IRB); + insertHookCall(I, CsanBeforeCallsite, {CallsiteId, FuncId, NumMVVal, + PropVal}); + + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + if (!CB->doesNotReturn()) { + // If this function can return normally, insert an after_call hook at the + // normal destination. + insertHookCallInSuccessorBB( + II->getNormalDest(), II->getParent(), CsanAfterCallsite, + {CallsiteId, FuncId, NumMVVal, PropVal}, + {DefaultID, DefaultID, IRB.getInt8(0), DefaultPropVal}); + } + CsiCallProperty Prop; + Prop.setIsIndirect(!Called); + Prop.setIsUnwind(); + Value *PropVal = Prop.getValue(IRB); + insertHookCallInSuccessorBB( + II->getUnwindDest(), II->getParent(), CsanAfterCallsite, + {CallsiteId, FuncId, NumMVVal, PropVal}, + {DefaultID, DefaultID, IRB.getInt8(0), DefaultPropVal}); + } else if (!CB->doesNotReturn()) { + // If this function can return normally, insert an after_call hook at the + // normal destination. + + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + PropVal = Prop.getValue(IRB); + insertHookCall(&*Iter, CsanAfterCallsite, + {CallsiteId, FuncId, NumMVVal, PropVal}); + } + + return true; +} + +bool CilkSanitizerImpl::suppressCallsite(Instruction *I) { + if (callsPlaceholderFunction(*I)) + return false; + + bool IsInvoke = isa(I); + + IRBuilder<> IRB(I); + insertHookCall(I, CsanDisableChecking, {}); + + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + insertHookCallInSuccessorBB( + II->getNormalDest(), II->getParent(), CsanEnableChecking, {}, {}); + insertHookCallInSuccessorBB( + II->getUnwindDest(), II->getParent(), CsanEnableChecking, {}, {}); + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + insertHookCall(&*Iter, CsanEnableChecking, {}); + } + + return true; +} + +static bool IsMemTransferDstOperand(unsigned OperandNum) { + // This check should be kept in sync with TapirRaceDetect::GetGeneralAccesses. + return (OperandNum == 0); +} + +static bool IsMemTransferSrcOperand(unsigned OperandNum) { + // This check should be kept in sync with TapirRaceDetect::GetGeneralAccesses. + return (OperandNum == 1); +} + +bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, + unsigned OperandNum, + IRBuilder<> &IRB) { + CsiLoadStoreProperty Prop; + if (AnyMemTransferInst *M = dyn_cast(I)) { + // Only instrument the large load and the large store components as + // necessary. + bool Instrumented = false; + + if (IsMemTransferDstOperand(OperandNum)) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + Value *Addr = M->getDest(); + Prop.setAlignment(M->getDestAlign()); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + // Instrument the store + uint64_t StoreId = StoreFED.add(*I); + + // TODO: Don't recalculate underlying objects + uint64_t StoreObjId = StoreObj.add(*I, lookupUnderlyingObject(Addr)); + assert(StoreId == StoreObjId && + "Store received different ID's in FED and object tables."); + + Value *CsiId = StoreFED.localToGlobalId(StoreId, IRB); + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getLength(), IntptrTy, false), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanLargeWrite, Args); + IRB.SetInstDebugLocation(Call); + ++NumInstrumentedMemIntrinsicWrites; + Instrumented = true; + } + + if (IsMemTransferSrcOperand(OperandNum)) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + Value *Addr = M->getSource(); + Prop.setAlignment(M->getSourceAlign()); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + // Instrument the load + uint64_t LoadId = LoadFED.add(*I); + + // TODO: Don't recalculate underlying objects + uint64_t LoadObjId = LoadObj.add(*I, lookupUnderlyingObject(Addr)); + assert(LoadId == LoadObjId && + "Load received different ID's in FED and object tables."); + + Value *CsiId = LoadFED.localToGlobalId(LoadId, IRB); + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getLength(), IntptrTy, false), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanLargeRead, Args); + IRB.SetInstDebugLocation(Call); + ++NumInstrumentedMemIntrinsicReads; + Instrumented = true; + } + return Instrumented; + } else if (AnyMemIntrinsic *M = dyn_cast(I)) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + Value *Addr = M->getDest(); + Prop.setAlignment(M->getDestAlign()); + Prop.setIsThreadLocal(isThreadLocalObject(lookupUnderlyingObject(Addr))); + uint64_t LocalId = StoreFED.add(*I); + + // TODO: Don't recalculate underlying objects + uint64_t StoreObjId = StoreObj.add(*I, lookupUnderlyingObject(Addr)); + assert(LocalId == StoreObjId && + "Store received different ID's in FED and object tables."); + + Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getLength(), IntptrTy, false), + Prop.getValue(IRB)}; + Instruction *Call = IRB.CreateCall(CsanLargeWrite, Args); + IRB.SetInstDebugLocation(Call); + ++NumInstrumentedMemIntrinsicWrites; + return true; + } + return false; +} + +static void getTaskExits( + DetachInst *DI, SmallVectorImpl &TaskReturns, + SmallVectorImpl &TaskResumes, + SmallVectorImpl &SharedEHExits, + TaskInfo &TI) { + BasicBlock *DetachedBlock = DI->getDetached(); + Task *T = TI.getTaskFor(DetachedBlock); + BasicBlock *ContinueBlock = DI->getContinue(); + + // Examine the predecessors of the continue block and save any predecessors in + // the task as a task return. + for (BasicBlock *Pred : predecessors(ContinueBlock)) { + if (T->simplyEncloses(Pred)) { + assert(isa(Pred->getTerminator())); + TaskReturns.push_back(Pred); + } + } + + // If the detach cannot throw, we're done. + if (!DI->hasUnwindDest()) + return; + + // Detached-rethrow exits can appear in strange places within a task-exiting + // spindle. Hence we loop over all blocks in the spindle to find + // detached rethrows. + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + if (S->isSharedEH()) { + if (llvm::any_of(predecessors(S), + [](const Spindle *Pred){ return !Pred->isSharedEH(); })) + SharedEHExits.push_back(S); + continue; + } + + for (BasicBlock *B : S->blocks()) + if (isDetachedRethrow(B->getTerminator())) + TaskResumes.push_back(B); + } +} + +bool CilkSanitizerImpl::instrumentDetach(DetachInst *DI, unsigned SyncRegNum, + unsigned NumSyncRegs, + DominatorTree &DT, TaskInfo &TI, + LoopInfo &LI) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return true; + + LLVMContext &Ctx = DI->getContext(); + BasicBlock *TaskEntryBlock = TI.getTaskFor(DI->getParent())->getEntry(); + IRBuilder<> IDBuilder(getEntryBBInsertPt(*TaskEntryBlock)); + bool TapirLoopBody = spawnsTapirLoopBody(DI, LI, TI); + ConstantInt *SyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), SyncRegNum); + ConstantInt *DefaultSyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + CsiDetachProperty DetachProp; + DetachProp.setForTapirLoopBody(TapirLoopBody); + // Instrument the detach instruction itself + Value *DetachID; + { + IRBuilder<> IRB(DI); + uint64_t LocalID = DetachFED.add(*DI); + DetachID = DetachFED.localToGlobalId(LocalID, IDBuilder); + Instruction *Call = IRB.CreateCall( + CsanDetach, {DetachID, SyncRegVal, DetachProp.getValue(IRB)}); + IRB.SetInstDebugLocation(Call); + } + NumInstrumentedDetaches++; + + // Find the detached block, continuation, and associated reattaches. + BasicBlock *DetachedBlock = DI->getDetached(); + BasicBlock *ContinueBlock = DI->getContinue(); + Task *T = TI.getTaskFor(DetachedBlock); + SmallVector TaskExits, TaskResumes; + SmallVector SharedEHExits; + getTaskExits(DI, TaskExits, TaskResumes, SharedEHExits, TI); + + // Instrument the entry and exit points of the detached task. + { + // Instrument the entry point of the detached task. + IRBuilder<> IRB(&*getFirstInsertionPtInDetachedBlock(DetachedBlock)); + uint64_t LocalID = TaskFED.add(*DetachedBlock); + Value *TaskID = TaskFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskProperty Prop; + Prop.setIsTapirLoopBody(TapirLoopBody); + Prop.setNumSyncReg(NumSyncRegs); + // Get the frame and stack pointers. + Value *FrameAddr = IRB.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::task_frameaddress), + {IRB.getInt32(0)}); + Value *StackSave = IRB.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + Instruction *Call = IRB.CreateCall(CsanTaskEntry, + {TaskID, DetachID, FrameAddr, + StackSave, Prop.getValue(IRB)}); + IRB.SetInstDebugLocation(Call); + + // Instrument the exit points of the detached tasks. + for (BasicBlock *TaskExit : TaskExits) { + IRBuilder<> IRB(TaskExit->getTerminator()); + uint64_t LocalID = TaskExitFED.add(*TaskExit->getTerminator()); + Value *TaskExitID = TaskExitFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + Instruction *Call = + IRB.CreateCall(CsanTaskExit, {TaskExitID, TaskID, DetachID, + SyncRegVal, ExitProp.getValue(IRB)}); + IRB.SetInstDebugLocation(Call); + NumInstrumentedDetachExits++; + } + // Instrument the EH exits of the detached task. + for (BasicBlock *TaskExit : TaskResumes) { + IRBuilder<> IRB(TaskExit->getTerminator()); + uint64_t LocalID = TaskExitFED.add(*TaskExit->getTerminator()); + Value *TaskExitID = TaskExitFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + Instruction *Call = + IRB.CreateCall(CsanTaskExit, {TaskExitID, TaskID, DetachID, + SyncRegVal, ExitProp.getValue(IRB)}); + IRB.SetInstDebugLocation(Call); + NumInstrumentedDetachExits++; + } + + Value *DefaultID = getDefaultID(IDBuilder); + for (Spindle *SharedEH : SharedEHExits) { + // Skip shared-eh spindle exits that are placeholder unreachable blocks. + if (isa( + SharedEH->getEntry()->getFirstNonPHIOrDbgOrLifetime())) + continue; + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + insertHookCallAtSharedEHSpindleExits( + SharedEH, T, CsanTaskExit, TaskExitFED, + {TaskID, DetachID, SyncRegVal, ExitProp.getValueImpl(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, + CsiTaskExitProperty::getDefaultValueImpl(Ctx)}); + } + } + + // Instrument the continuation of the detach. + { + if (isCriticalContinueEdge(DI, 1)) + ContinueBlock = SplitCriticalEdge( + DI, 1, + CriticalEdgeSplittingOptions(&DT, &LI).setSplitDetachContinue()); + + IRBuilder<> IRB(&*ContinueBlock->getFirstInsertionPt()); + uint64_t LocalID = DetachContinueFED.add(*ContinueBlock); + Value *ContinueID = DetachContinueFED.localToGlobalId(LocalID, IDBuilder); + CsiDetachContinueProperty ContProp; + ContProp.setForTapirLoopBody(TapirLoopBody); + Instruction *Call = + IRB.CreateCall(CsanDetachContinue, {ContinueID, DetachID, SyncRegVal, + ContProp.getValue(IRB)}); + IRB.SetInstDebugLocation(Call); + } + // Instrument the unwind of the detach, if it exists. + if (DI->hasUnwindDest()) { + BasicBlock *UnwindBlock = DI->getUnwindDest(); + BasicBlock *PredBlock = DI->getParent(); + if (Value *TF = T->getTaskFrameUsed()) { + // If the detached task uses a taskframe, then we want to insert the + // detach_continue instrumentation for the unwind destination after the + // taskframe.resume. + UnwindBlock = getTaskFrameResumeDest(TF); + assert(UnwindBlock && + "Detach with unwind uses a taskframe with no resume"); + PredBlock = getTaskFrameResume(TF)->getParent(); + } + Value *DefaultID = getDefaultID(IDBuilder); + uint64_t LocalID = DetachContinueFED.add(*UnwindBlock); + Value *ContinueID = DetachContinueFED.localToGlobalId(LocalID, IDBuilder); + CsiDetachContinueProperty ContProp; + Value *DefaultPropVal = ContProp.getValueImpl(Ctx); + ContProp.setIsUnwind(); + ContProp.setForTapirLoopBody(TapirLoopBody); + insertHookCallInSuccessorBB( + UnwindBlock, PredBlock, CsanDetachContinue, + {ContinueID, DetachID, SyncRegVal, ContProp.getValue(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, DefaultPropVal}); + for (BasicBlock *DRPred : predecessors(UnwindBlock)) + if (isDetachedRethrow(DRPred->getTerminator(), DI->getSyncRegion())) + insertHookCallInSuccessorBB( + UnwindBlock, DRPred, CsanDetachContinue, + {ContinueID, DetachID, SyncRegVal, ContProp.getValue(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, DefaultPropVal}); + } + return true; +} + +bool CilkSanitizerImpl::instrumentSync(SyncInst *SI, unsigned SyncRegNum) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return true; + + IRBuilder<> IRB(SI); + // Get the ID of this sync. + uint64_t LocalID = SyncFED.add(*SI); + Value *SyncID = SyncFED.localToGlobalId(LocalID, IRB); + // Insert instrumentation before the sync. + insertHookCall(SI, CsanSync, {SyncID, IRB.getInt32(SyncRegNum)}); + + // NOTE: Because Cilksan executes serially, any exceptions thrown before this + // sync will appear to be thrown from their respective spawns or calls, not + // the sync or the Cilk personality function. Hence we don't need + // instrumentation in the unwind destination of the sync. + + NumInstrumentedSyncs++; + return true; +} + +void CilkSanitizerImpl::instrumentTapirLoop(Loop &L, TaskInfo &TI, + DenseMap &SyncRegNums, + ScalarEvolution *SE) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SERIESPARALLEL)) + return; + + assert(L.isLoopSimplifyForm() && "CSI assumes loops are in simplified form."); + BasicBlock *Preheader = L.getLoopPreheader(); + Task *T = getTaskIfTapirLoop(&L, &TI); + assert(T && "CilkSanitizer should only instrument Tapir loops."); + unsigned SyncRegNum = SyncRegNums[T->getDetach()->getSyncRegion()]; + // We assign a local ID for this loop here, so that IDs for loops follow a + // depth-first ordering. + csi_id_t LocalId = LoopFED.add(*T->getDetach()); + + SmallVector ExitingBlocks; + L.getExitingBlocks(ExitingBlocks); + SmallVector ExitBlocks; + L.getUniqueExitBlocks(ExitBlocks); + + // Record properties of this loop. + CsiLoopProperty LoopProp; + LoopProp.setIsTapirLoop(static_cast(getTaskIfTapirLoop(&L, &TI))); + LoopProp.setHasUniqueExitingBlock((ExitingBlocks.size() == 1)); + + IRBuilder<> IRB(Preheader->getTerminator()); + Value *LoopCsiId = LoopFED.localToGlobalId(LocalId, IRB); + Value *LoopPropVal = LoopProp.getValue(IRB); + + // Try to evaluate the runtime trip count for this loop. Default to a count + // of -1 for unknown trip counts. + Value *TripCount = IRB.getInt64(-1); + if (SE) { + const SCEV *TripCountSC = getRuntimeTripCount(L, SE, true); + if (!isa(TripCountSC)) { + // Extend the TripCount type if necessary. + if (TripCountSC->getType() != IRB.getInt64Ty()) + TripCountSC = SE->getZeroExtendExpr(TripCountSC, IRB.getInt64Ty()); + // Compute the trip count to pass to the CSI hook. + SCEVExpander Expander(*SE, DL, "csi"); + TripCount = Expander.expandCodeFor(TripCountSC, IRB.getInt64Ty(), + &*IRB.GetInsertPoint()); + } + } + + // Insert before-loop hook. + insertHookCall(&*IRB.GetInsertPoint(), CsanBeforeLoop, {LoopCsiId, TripCount, + LoopPropVal}); + + // Insert after-loop hooks. + for (BasicBlock *BB : ExitBlocks) { + // If the exit block is simply enclosed inside the task, then its on an + // exceptional exit path from the task. In that case, the exit path will + // reach the unwind destination of the detach. Because the unwind + // destination of the detach is in the set of exit blocks, we can safely + // skip any exit blocks enclosed in the task. + if (!T->encloses(BB)) { + IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); + insertHookCall(&*IRB.GetInsertPoint(), CsanAfterLoop, + {LoopCsiId, IRB.getInt8(SyncRegNum), LoopPropVal}); + } + } +} + +bool CilkSanitizerImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + IRBuilder<> IRB(I); + bool AllocaInEntryBlock = isEntryBlock(*I->getParent(), TI); + if (AllocaInEntryBlock) + IRB.SetInsertPoint(getEntryBBInsertPt(*I->getParent())); + AllocaInst *AI = cast(I); + + uint64_t LocalId = AllocaFED.add(*I); + Value *CsiId = AllocaFED.localToGlobalId(LocalId, IRB); + uint64_t AllocaObjId = AllocaObj.add(*I, I); + assert(LocalId == AllocaObjId && + "Alloca received different ID's in FED and object tables."); + + CsiAllocaProperty Prop; + Prop.setIsStatic(AI->isStaticAlloca()); + Value *PropVal = Prop.getValue(IRB); + + // Get size of allocation. + uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); + Value *SizeVal = IRB.getInt64(Size); + if (AI->isArrayAllocation()) + SizeVal = IRB.CreateMul(SizeVal, + IRB.CreateZExtOrBitCast(AI->getArraySize(), + IRB.getInt64Ty())); + + BasicBlock::iterator Iter(I); + if (!AllocaInEntryBlock) { + Iter++; + IRB.SetInsertPoint(&*Iter); + } else { + Iter = IRB.GetInsertPoint(); + } + + Type *AddrType = IRB.getInt8PtrTy(); + Value *Addr = IRB.CreatePointerCast(I, AddrType); + insertHookCall(&*Iter, CsiAfterAlloca, {CsiId, Addr, SizeVal, PropVal}); + + NumInstrumentedAllocas++; + return true; +} + +static Value *getHeapObject(Value *I) { + Value *Object = nullptr; + unsigned NumOfBitCastUses = 0; + + // Determine if CallInst has a bitcast use. + for (Value::user_iterator UI = I->user_begin(), E = I->user_end(); + UI != E;) + if (BitCastInst *BCI = dyn_cast(*UI++)) { + // Look for a dbg.value intrinsic for this bitcast. + SmallVector DbgValues; + findDbgValues(DbgValues, BCI); + if (!DbgValues.empty()) { + Object = BCI; + NumOfBitCastUses++; + } + } + + // Heap-allocation call has 1 debug-bitcast use, so use that bitcast as the + // object. + if (NumOfBitCastUses == 1) + return Object; + + // Otherwise just use the heap-allocation call directly. + return I; +} + +bool CilkSanitizerImpl::instrumentAllocFnLibCall(Instruction *I, + const TargetLibraryInfo *TLI) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + bool IsInvoke = isa(I); + CallBase *CB = dyn_cast(I); + if (!CB) + return false; + Function *Called = CB->getCalledFunction(); + + // Get the CSI IDs for this hook + IRBuilder<> IRB(I); + LLVMContext &Ctx = IRB.getContext(); + Value *DefaultID = getDefaultID(IRB); + uint64_t LocalId = AllocFnFED.add(*I); + Value *AllocFnId = AllocFnFED.localToGlobalId(LocalId, IRB); + Value *FuncId = GetCalleeFuncID(Called, IRB); + assert(FuncId != NULL); + + // Get the ID for the corresponding heap object + Value *HeapObj = nullptr; + if ("posix_memalign" == Called->getName()) + HeapObj = getHeapObject(CB->getArgOperand(0)); + else + HeapObj = getHeapObject(I); + uint64_t AllocFnObjId = AllocFnObj.add(*I, HeapObj); + assert(LocalId == AllocFnObjId && + "Allocation fn received different ID's in FED and object tables."); + + // TODO: Propagate MAAPs to allocation-function library calls + Value *NumMVVal = IRB.getInt8(0); + + CsiAllocFnProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + LibFunc AllocLibF; + TLI->getLibFunc(*Called, AllocLibF); + Prop.setAllocFnTy(static_cast(getAllocFnTy(AllocLibF))); + Value *PropVal = Prop.getValue(IRB); + Type *IDType = IRB.getInt64Ty(); + + // Synthesize the after hook for this function. + SmallVector AfterHookParamTys({IDType, /*callee func_id*/ IDType, + /*MAAP_count*/ IRB.getInt8Ty(), + CsiAllocFnProperty::getType(Ctx)}); + SmallVector AfterHookParamVals( + {AllocFnId, FuncId, NumMVVal, PropVal}); + SmallVector AfterHookDefaultVals( + {DefaultID, DefaultID, IRB.getInt8(0), DefaultPropVal}); + if (!Called->getReturnType()->isVoidTy()) { + AfterHookParamTys.push_back(Called->getReturnType()); + AfterHookParamVals.push_back(CB); + AfterHookDefaultVals.push_back( + Constant::getNullValue(Called->getReturnType())); + } + AfterHookParamTys.append(Called->getFunctionType()->param_begin(), + Called->getFunctionType()->param_end()); + AfterHookParamVals.append(CB->arg_begin(), CB->arg_end()); + for (Value *Arg : CB->args()) + AfterHookDefaultVals.push_back(Constant::getNullValue(Arg->getType())); + FunctionType *AfterHookTy = + FunctionType::get(IRB.getVoidTy(), AfterHookParamTys, Called->isVarArg()); + FunctionCallee AfterLibCallHook = getOrInsertSynthesizedHook( + ("__csan_alloc_" + Called->getName()).str(), AfterHookTy); + + // Insert the hook after the call. + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + insertHookCallInSuccessorBB( + II->getNormalDest(), II->getParent(), AfterLibCallHook, + AfterHookParamVals, AfterHookDefaultVals); + // Don't insert any instrumentation in the exception block. + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + insertHookCall(&*Iter, AfterLibCallHook, AfterHookParamVals); + } + + NumInstrumentedAllocFns++; + return true; +} + +bool CilkSanitizerImpl::instrumentAllocationFn(Instruction *I, + DominatorTree &DT, + const TargetLibraryInfo *TLI) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + bool IsInvoke = isa(I); + assert(isa(I) && + "instrumentAllocationFn not given a call or invoke instruction."); + Function *Called = dyn_cast(I)->getCalledFunction(); + assert(Called && "Could not get called function for allocation fn."); + + IRBuilder<> IRB(I); + SmallVector AllocFnArgs; + if (!getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getInt8PtrTy(), *TLI)) { + return instrumentAllocFnLibCall(I, TLI); + } + SmallVector DefaultAllocFnArgs( + {/* Allocated size */ Constant::getNullValue(IntptrTy), + /* Number of elements */ Constant::getNullValue(IntptrTy), + /* Alignment */ Constant::getNullValue(IntptrTy), + /* Old pointer */ Constant::getNullValue(IRB.getInt8PtrTy()),}); + + Value *DefaultID = getDefaultID(IRB); + uint64_t LocalId = AllocFnFED.add(*I); + Value *AllocFnId = AllocFnFED.localToGlobalId(LocalId, IRB); + uint64_t AllocFnObjId = AllocFnObj.add(*I, getHeapObject(I)); + assert(LocalId == AllocFnObjId && + "Allocation fn received different ID's in FED and object tables."); + + CsiAllocFnProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + LibFunc AllocLibF; + TLI->getLibFunc(*Called, AllocLibF); + Prop.setAllocFnTy(static_cast(getAllocFnTy(AllocLibF))); + AllocFnArgs.push_back(Prop.getValue(IRB)); + DefaultAllocFnArgs.push_back(DefaultPropVal); + + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + + BasicBlock *NormalBB = II->getNormalDest(); + unsigned SuccNum = GetSuccessorNumber(II->getParent(), NormalBB); + if (isCriticalEdge(II, SuccNum)) + NormalBB = SplitCriticalEdge(II, SuccNum, + CriticalEdgeSplittingOptions(&DT)); + // Insert hook into normal destination. + { + IRB.SetInsertPoint(&*NormalBB->getFirstInsertionPt()); + SmallVector AfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + insertHookCall(&*IRB.GetInsertPoint(), CsanAfterAllocFn, + AfterAllocFnArgs); + } + // Insert hook into unwind destination. + { + // The return value of the allocation function is not valid in the unwind + // destination. + SmallVector AfterAllocFnArgs, DefaultAfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + DefaultAfterAllocFnArgs.push_back(DefaultID); + DefaultAfterAllocFnArgs.push_back( + Constant::getNullValue(IRB.getInt8PtrTy())); + DefaultAfterAllocFnArgs.append(DefaultAllocFnArgs.begin(), + DefaultAllocFnArgs.end()); + insertHookCallInSuccessorBB( + II->getUnwindDest(), II->getParent(), CsanAfterAllocFn, + AfterAllocFnArgs, DefaultAfterAllocFnArgs); + } + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + SmallVector AfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + insertHookCall(&*Iter, CsanAfterAllocFn, AfterAllocFnArgs); + } + + NumInstrumentedAllocFns++; + return true; +} + +bool CilkSanitizerImpl::instrumentFree(Instruction *I, + const TargetLibraryInfo *TLI) { + // Only insert instrumentation if requested + if (!(InstrumentationSet & SHADOWMEMORY)) + return true; + + // It appears that frees (and deletes) never throw. + assert(isa(I) && "Free call is not a call instruction"); + + CallInst *FC = cast(I); + Function *Called = FC->getCalledFunction(); + assert(Called && "Could not get called function for free."); + + IRBuilder<> IRB(I); + uint64_t LocalId = FreeFED.add(*I); + Value *FreeId = FreeFED.localToGlobalId(LocalId, IRB); + + // All currently supported free functions free the first argument. + Value *Addr = FC->getArgOperand(0); + CsiFreeProperty Prop; + LibFunc FreeLibF; + TLI->getLibFunc(*Called, FreeLibF); + Prop.setFreeTy(static_cast(getFreeTy(FreeLibF))); + + BasicBlock::iterator Iter(I); + Iter++; + IRB.SetInsertPoint(&*Iter); + insertHookCall(&*Iter, CsanAfterFree, {FreeId, Addr, Prop.getValue(IRB)}); + + NumInstrumentedFrees++; + return true; +} + +bool CilkSanitizerLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + CallGraph *CG = &getAnalysis().getCallGraph(); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + auto GetDomTree = [this](Function &F) -> DominatorTree & { + return this->getAnalysis(F).getDomTree(); + }; + auto GetTaskInfo = [this](Function &F) -> TaskInfo & { + return this->getAnalysis(F).getTaskInfo(); + }; + auto GetLoopInfo = [this](Function &F) -> LoopInfo & { + return this->getAnalysis(F).getLoopInfo(); + }; + auto GetRaceInfo = [this](Function &F) -> RaceInfo & { + return this->getAnalysis(F).getRaceInfo(); + }; + auto GetSE = [this](Function &F) -> ScalarEvolution & { + return this->getAnalysis(F).getSE(); + }; + + bool Changed = + CilkSanitizerImpl(M, CG, GetDomTree, nullptr, GetLoopInfo, nullptr, + GetTLI, nullptr, CallsMayThrow, JitMode) + .setup(true); + Changed |= + CilkSanitizerImpl(M, CG, GetDomTree, GetTaskInfo, GetLoopInfo, + GetRaceInfo, GetTLI, GetSE, CallsMayThrow, JitMode) + .run(); + return Changed; +} + +PreservedAnalyses CilkSanitizerPass::run(Module &M, ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + auto &CG = AM.getResult(M); + auto GetDT = + [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + auto GetTI = + [&FAM](Function &F) -> TaskInfo & { + return FAM.getResult(F); + }; + auto GetLI = + [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); + }; + auto GetRI = + [&FAM](Function &F) -> RaceInfo & { + return FAM.getResult(F); + }; + auto GetTLI = + [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + auto GetSE = [&FAM](Function &F) -> ScalarEvolution & { + return FAM.getResult(F); + }; + + bool Changed = + CilkSanitizerImpl(M, &CG, GetDT, nullptr, GetLI, nullptr, GetTLI, nullptr) + .setup(false); + Changed |= + CilkSanitizerImpl(M, &CG, GetDT, GetTI, GetLI, GetRI, GetTLI, GetSE) + .run(); + + if (!Changed) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp new file mode 100644 index 00000000000000..dab90267f9af37 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -0,0 +1,2945 @@ +//===- ComprehensiveStaticInstrumentation.cpp - CSI compiler pass ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of CSI, a framework that provides comprehensive static +// instrumentation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/EHPersonalities.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/InitializePasses.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/CSI.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "csi" + +static cl::opt + ClInstrumentFuncEntryExit("csi-instrument-func-entry-exit", cl::init(true), + cl::desc("Instrument function entry and exit"), + cl::Hidden); +static cl::opt + ClInstrumentLoops("csi-instrument-loops", cl::init(true), + cl::desc("Instrument loops"), cl::Hidden); +static cl::opt + ClInstrumentBasicBlocks("csi-instrument-basic-blocks", cl::init(true), + cl::desc("Instrument basic blocks"), cl::Hidden); +static cl::opt + ClInstrumentMemoryAccesses("csi-instrument-memory-accesses", cl::init(true), + cl::desc("Instrument memory accesses"), + cl::Hidden); +static cl::opt ClInstrumentCalls("csi-instrument-function-calls", + cl::init(true), + cl::desc("Instrument function calls"), + cl::Hidden); +static cl::opt ClInstrumentAtomics("csi-instrument-atomics", + cl::init(true), + cl::desc("Instrument atomics"), + cl::Hidden); +static cl::opt ClInstrumentMemIntrinsics( + "csi-instrument-memintrinsics", cl::init(true), + cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden); +static cl::opt ClInstrumentTapir("csi-instrument-tapir", cl::init(true), + cl::desc("Instrument tapir constructs"), + cl::Hidden); +static cl::opt ClInstrumentAllocas("csi-instrument-alloca", + cl::init(true), + cl::desc("Instrument allocas"), + cl::Hidden); +static cl::opt + ClInstrumentAllocFns("csi-instrument-allocfn", cl::init(true), + cl::desc("Instrument allocation functions"), + cl::Hidden); + +static cl::opt ClInterpose("csi-interpose", cl::init(true), + cl::desc("Enable function interpositioning"), + cl::Hidden); + +static cl::opt ClToolBitcode( + "csi-tool-bitcode", cl::init(""), + cl::desc("Path to the tool bitcode file for compile-time instrumentation"), + cl::Hidden); + +static cl::opt + ClRuntimeBitcode("csi-runtime-bitcode", cl::init(""), + cl::desc("Path to the CSI runtime bitcode file for " + "optimized compile-time instrumentation"), + cl::Hidden); + +static cl::opt ClToolLibrary( + "csi-tool-library", cl::init(""), + cl::desc("Path to the tool library file for compile-time instrumentation"), + cl::Hidden); + +static cl::opt ClConfigurationFilename( + "csi-config-filename", cl::init(""), + cl::desc("Path to the configuration file for surgical instrumentation"), + cl::Hidden); + +static cl::opt ClConfigurationMode( + "csi-config-mode", cl::init(InstrumentationConfigMode::WHITELIST), + cl::values(clEnumValN(InstrumentationConfigMode::WHITELIST, "whitelist", + "Use configuration file as a whitelist"), + clEnumValN(InstrumentationConfigMode::BLACKLIST, "blacklist", + "Use configuration file as a blacklist")), + cl::desc("Specifies how to interpret the configuration file"), cl::Hidden); + +static cl::opt + AssumeNoExceptions( + "csi-assume-no-exceptions", cl::init(false), cl::Hidden, + cl::desc("Assume that ordinary calls cannot throw exceptions.")); + +static cl::opt + SplitBlocksAtCalls( + "csi-split-blocks-at-calls", cl::init(true), cl::Hidden, + cl::desc("Split basic blocks at function calls.")); + +static size_t numPassRuns = 0; +bool IsFirstRun() { return numPassRuns == 0; } + +namespace { + +static CSIOptions OverrideFromCL(CSIOptions Options) { + Options.InstrumentFuncEntryExit = ClInstrumentFuncEntryExit; + Options.InstrumentLoops = ClInstrumentLoops; + Options.InstrumentBasicBlocks = ClInstrumentBasicBlocks; + Options.InstrumentMemoryAccesses = ClInstrumentMemoryAccesses; + Options.InstrumentCalls = ClInstrumentCalls; + Options.InstrumentAtomics = ClInstrumentAtomics; + Options.InstrumentMemIntrinsics = ClInstrumentMemIntrinsics; + Options.InstrumentTapir = ClInstrumentTapir; + Options.InstrumentAllocas = ClInstrumentAllocas; + Options.InstrumentAllocFns = ClInstrumentAllocFns; + Options.CallsMayThrow = !AssumeNoExceptions; + Options.CallsTerminateBlocks = SplitBlocksAtCalls; + return Options; +} + +/// The Comprehensive Static Instrumentation pass. +/// Inserts calls to user-defined hooks at predefined points in the IR. +struct ComprehensiveStaticInstrumentationLegacyPass : public ModulePass { + static char ID; // Pass identification, replacement for typeid. + + ComprehensiveStaticInstrumentationLegacyPass( + const CSIOptions &Options = OverrideFromCL(CSIOptions())) + : ModulePass(ID), Options(Options) { + initializeComprehensiveStaticInstrumentationLegacyPassPass( + *PassRegistry::getPassRegistry()); + } + StringRef getPassName() const override { + return "ComprehensiveStaticInstrumentation"; + } + bool runOnModule(Module &M) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + +private: + CSIOptions Options; +}; // struct ComprehensiveStaticInstrumentation +} // anonymous namespace + +char ComprehensiveStaticInstrumentationLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ComprehensiveStaticInstrumentationLegacyPass, "csi", + "ComprehensiveStaticInstrumentation pass", false, false) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ComprehensiveStaticInstrumentationLegacyPass, "csi", + "ComprehensiveStaticInstrumentation pass", false, false) + +/// Return the first DILocation in the given basic block, or nullptr +/// if none exists. +static const DILocation *getFirstDebugLoc(const BasicBlock &BB) { + for (const Instruction &Inst : BB) + if (const DILocation *Loc = Inst.getDebugLoc()) + return Loc; + + return nullptr; +} + +/// Set DebugLoc on the call instruction to a CSI hook, based on the +/// debug information of the instrumented instruction. +static void setInstrumentationDebugLoc(Instruction *Instrumented, + Instruction *Call) { + DISubprogram *Subprog = Instrumented->getFunction()->getSubprogram(); + if (Subprog) { + if (Instrumented->getDebugLoc()) { + Call->setDebugLoc(Instrumented->getDebugLoc()); + } else { + LLVMContext &C = Instrumented->getContext(); + Call->setDebugLoc(DILocation::get(C, 0, 0, Subprog)); + } + } +} + +/// Set DebugLoc on the call instruction to a CSI hook, based on the +/// debug information of the instrumented instruction. +static void setInstrumentationDebugLoc(BasicBlock &Instrumented, + Instruction *Call) { + DISubprogram *Subprog = Instrumented.getParent()->getSubprogram(); + if (Subprog) { + if (const DILocation *FirstDebugLoc = getFirstDebugLoc(Instrumented)) + Call->setDebugLoc(FirstDebugLoc); + else { + LLVMContext &C = Instrumented.getContext(); + Call->setDebugLoc(DILocation::get(C, 0, 0, Subprog)); + } + } +} + +bool CSISetupImpl::run() { + bool Changed = false; + for (Function &F : M) + Changed |= setupFunction(F); + return Changed; +} + +bool CSISetupImpl::setupFunction(Function &F) { + if (F.empty() || CSIImpl::shouldNotInstrumentFunction(F)) + return false; + + if (Options.CallsMayThrow) + // Promote calls to invokes to insert CSI instrumentation in + // exception-handling code. + CSIImpl::setupCalls(F); + + // If we do not assume that calls terminate blocks, or if we're not + // instrumenting basic blocks, then we're done. + if (Options.InstrumentBasicBlocks && Options.CallsTerminateBlocks) + CSIImpl::splitBlocksAtCalls(F); + + LLVM_DEBUG(dbgs() << "Setup function:\n" << F); + + return true; +} + +bool CSIImpl::callsPlaceholderFunction(const Instruction &I) { + if (isa(I)) + return true; + + if (isDetachedRethrow(&I) || isTaskFrameResume(&I) || isSyncUnwind(&I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(&I)) + switch (II->getIntrinsicID()) { + default: break; + // FIXME: This list is repeated from NoTTI::getIntrinsicCost. + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::coro_alloc: + case Intrinsic::coro_begin: + case Intrinsic::coro_free: + case Intrinsic::coro_end: + case Intrinsic::coro_frame: + case Intrinsic::coro_size: + case Intrinsic::coro_suspend: + case Intrinsic::coro_subfn_addr: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + case Intrinsic::taskframe_end: + case Intrinsic::taskframe_load_guard: + case Intrinsic::tapir_runtime_start: + case Intrinsic::tapir_runtime_end: + // These intrinsics don't actually represent code after lowering. + return true; + } + + return false; +} + +bool CSIImpl::spawnsTapirLoopBody(DetachInst *DI, LoopInfo &LI, TaskInfo &TI) { + Loop *L = LI.getLoopFor(DI->getParent()); + return (TI.getTaskFor(DI->getDetached()) == getTaskIfTapirLoop(L, &TI)); +} + +bool CSIImpl::run() { + // Link the tool bitcode once initially, to get type definitions. + linkInToolFromBitcode(ClToolBitcode); + initializeCsi(); + + for (Function &F : M) + instrumentFunction(F); + + collectUnitFEDTables(); + collectUnitSizeTables(); + + finalizeCsi(); + + if (IsFirstRun() && Options.jitMode) { + llvm::sys::DynamicLibrary::LoadLibraryPermanently(ClToolLibrary.c_str()); + } + // Link the tool bitcode a second time, for definitions of used functions. + linkInToolFromBitcode(ClToolBitcode); + linkInToolFromBitcode(ClRuntimeBitcode); + + return true; // We always insert the unit constructor. +} + +Constant *ForensicTable::getObjectStrGV(Module &M, StringRef Str, + const Twine GVName) { + LLVMContext &C = M.getContext(); + IntegerType *Int32Ty = IntegerType::get(C, 32); + Constant *Zero = ConstantInt::get(Int32Ty, 0); + Value *GepArgs[] = {Zero, Zero}; + if (Str.empty()) + return ConstantPointerNull::get( + PointerType::get(IntegerType::get(C, 8), 0)); + + Constant *NameStrConstant = ConstantDataArray::getString(C, Str); + GlobalVariable *GV = M.getGlobalVariable((GVName + Str).str(), true); + if (GV == NULL) { + GV = new GlobalVariable(M, NameStrConstant->getType(), true, + GlobalValue::PrivateLinkage, NameStrConstant, + GVName + Str, nullptr, + GlobalVariable::NotThreadLocal, 0); + GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + } + assert(GV); + return ConstantExpr::getGetElementPtr(GV->getValueType(), GV, GepArgs); +} + +ForensicTable::ForensicTable(Module &M, StringRef BaseIdName, + StringRef TableName, bool UseExistingBaseId) + : TableName(TableName) { + LLVMContext &C = M.getContext(); + IntegerType *Int64Ty = IntegerType::get(C, 64); + IdCounter = 0; + + if (UseExistingBaseId) + // Try to look up an existing BaseId to use. + BaseId = M.getGlobalVariable(BaseIdName, true); + if (nullptr == BaseId) + BaseId = new GlobalVariable(M, Int64Ty, false, GlobalValue::InternalLinkage, + ConstantInt::get(Int64Ty, 0), BaseIdName); + assert(BaseId); +} + +uint64_t ForensicTable::getId(const Value *V) { + if (!ValueToLocalIdMap.count(V)) + ValueToLocalIdMap[V] = IdCounter++; + assert(ValueToLocalIdMap.count(V) && "Value not in ID map."); + return ValueToLocalIdMap[V]; +} + +Value *ForensicTable::localToGlobalId(uint64_t LocalId, + IRBuilder<> &IRB) const { + assert(BaseId); + LLVMContext &C = IRB.getContext(); + Type *BaseIdTy = IRB.getInt64Ty(); + LoadInst *Base = IRB.CreateLoad(BaseIdTy, BaseId); + MDNode *MD = MDNode::get(C, std::nullopt); + Base->setMetadata(LLVMContext::MD_invariant_load, MD); + Value *Offset = IRB.getInt64(LocalId); + return IRB.CreateAdd(Base, Offset); +} + +uint64_t SizeTable::add(const BasicBlock &BB, TargetTransformInfo *TTI) { + uint64_t ID = getId(&BB); + // Count the LLVM IR instructions + int32_t IRCost = 0; + for (const Instruction &I : BB) { + if (TTI) { + InstructionCost ICost = + TTI->getInstructionCost(&I, TargetTransformInfo::TCK_Latency); + if (!ICost.isValid()) + IRCost += static_cast(TargetTransformInfo::TCC_Basic); + else + IRCost += *(ICost.getValue()); + } else { + if (isa(I)) + continue; + if (CSIImpl::callsPlaceholderFunction(I)) + continue; + IRCost++; + } + } + add(ID, BB.size(), IRCost); + return ID; +} + +PointerType *SizeTable::getPointerType(LLVMContext &C) { + return PointerType::get(getSizeStructType(C), 0); +} + +StructType *SizeTable::getSizeStructType(LLVMContext &C) { + return StructType::get( + /* FullIRSize */ IntegerType::get(C, 32), + /* NonEmptyIRSize */ IntegerType::get(C, 32)); +} + +void SizeTable::add(uint64_t ID, int32_t FullIRSize, int32_t NonEmptyIRSize) { + assert(LocalIdToSizeMap.find(ID) == LocalIdToSizeMap.end() && + "ID already exists in FED table."); + LocalIdToSizeMap[ID] = {FullIRSize, NonEmptyIRSize}; +} + +Constant *SizeTable::insertIntoModule(Module &M) const { + LLVMContext &C = M.getContext(); + StructType *TableType = getSizeStructType(C); + IntegerType *Int32Ty = IntegerType::get(C, 32); + Constant *Zero = ConstantInt::get(Int32Ty, 0); + Value *GepArgs[] = {Zero, Zero}; + SmallVector TableEntries; + + for (uint64_t LocalID = 0; LocalID < IdCounter; ++LocalID) { + const SizeInformation &E = LocalIdToSizeMap.find(LocalID)->second; + Constant *FullIRSize = ConstantInt::get(Int32Ty, E.FullIRSize); + Constant *NonEmptyIRSize = ConstantInt::get(Int32Ty, E.NonEmptyIRSize); + // The order of arguments to ConstantStruct::get() must match the + // sizeinfo_t type in csi.h. + TableEntries.push_back( + ConstantStruct::get(TableType, FullIRSize, NonEmptyIRSize)); + } + + ArrayType *TableArrayType = ArrayType::get(TableType, TableEntries.size()); + Constant *Table = ConstantArray::get(TableArrayType, TableEntries); + GlobalVariable *GV = + new GlobalVariable(M, TableArrayType, false, GlobalValue::InternalLinkage, + Table, CsiUnitSizeTableName); + return ConstantExpr::getGetElementPtr(GV->getValueType(), GV, GepArgs); +} + +uint64_t FrontEndDataTable::add(const Function &F) { + uint64_t ID = getId(&F); + if (F.getSubprogram()) + add(ID, F.getSubprogram()); + else + add(ID, -1, -1, F.getParent()->getName(), "", F.getName()); + return ID; +} + +uint64_t FrontEndDataTable::add(const BasicBlock &BB) { + uint64_t ID = getId(&BB); + add(ID, getFirstDebugLoc(BB)); + return ID; +} + +uint64_t FrontEndDataTable::add(const Instruction &I, + const StringRef &RealName) { + uint64_t ID = getId(&I); + if (auto DL = I.getDebugLoc()) + add(ID, DL, RealName); + else { + if (const DISubprogram *Subprog = I.getFunction()->getSubprogram()) + add(ID, (int32_t)Subprog->getLine(), -1, Subprog->getFilename(), + Subprog->getDirectory(), + RealName == "" ? Subprog->getName() : RealName); + else + add(ID, -1, -1, I.getModule()->getName(), "", + RealName == "" ? I.getFunction()->getName() : RealName); + } + return ID; +} + +PointerType *FrontEndDataTable::getPointerType(LLVMContext &C) { + return PointerType::get(getSourceLocStructType(C), 0); +} + +StructType *FrontEndDataTable::getSourceLocStructType(LLVMContext &C) { + return StructType::get( + /* Name */ PointerType::get(IntegerType::get(C, 8), 0), + /* Line */ IntegerType::get(C, 32), + /* Column */ IntegerType::get(C, 32), + /* File */ PointerType::get(IntegerType::get(C, 8), 0)); +} + +void FrontEndDataTable::add(uint64_t ID, const DILocation *Loc, + const StringRef &RealName) { + if (Loc) { + // TODO: Add location information for inlining + const DISubprogram *Subprog = Loc->getScope()->getSubprogram(); + add(ID, (int32_t)Loc->getLine(), (int32_t)Loc->getColumn(), + Loc->getFilename(), Loc->getDirectory(), + RealName == "" ? Subprog->getName() : RealName); + } else + add(ID); +} + +void FrontEndDataTable::add(uint64_t ID, const DISubprogram *Subprog) { + if (Subprog) + add(ID, (int32_t)Subprog->getLine(), -1, Subprog->getFilename(), + Subprog->getDirectory(), Subprog->getName()); + else + add(ID); +} + +void FrontEndDataTable::add(uint64_t ID, int32_t Line, int32_t Column, + StringRef Filename, StringRef Directory, + StringRef Name) { + // TODO: This assert is too strong for unwind basic blocks' FED. + /*assert(LocalIdToSourceLocationMap.find(ID) == + LocalIdToSourceLocationMap.end() && + "Id already exists in FED table."); */ + LocalIdToSourceLocationMap[ID] = {Name, Line, Column, Filename, Directory}; +} + +// The order of arguments to ConstantStruct::get() must match the source_loc_t +// type in csi.h. +static void addFEDTableEntries(SmallVectorImpl &FEDEntries, + StructType *FedType, Constant *Name, + Constant *Line, Constant *Column, + Constant *File) { + FEDEntries.push_back(ConstantStruct::get(FedType, Name, Line, Column, File)); +} + +Constant *FrontEndDataTable::insertIntoModule(Module &M) const { + LLVMContext &C = M.getContext(); + StructType *FedType = getSourceLocStructType(C); + IntegerType *Int32Ty = IntegerType::get(C, 32); + Constant *Zero = ConstantInt::get(Int32Ty, 0); + Value *GepArgs[] = {Zero, Zero}; + SmallVector FEDEntries; + + for (uint64_t LocalID = 0; LocalID < IdCounter; ++LocalID) { + const SourceLocation &E = LocalIdToSourceLocationMap.find(LocalID)->second; + Constant *Line = ConstantInt::get(Int32Ty, E.Line); + Constant *Column = ConstantInt::get(Int32Ty, E.Column); + Constant *File; + { + std::string Filename = E.Filename.str(); + if (!E.Directory.empty()) + Filename = E.Directory.str() + "/" + Filename; + File = getObjectStrGV(M, Filename, "__csi_unit_filename_"); + } + Constant *Name = getObjectStrGV(M, E.Name, "__csi_unit_function_name_"); + addFEDTableEntries(FEDEntries, FedType, Name, Line, Column, File); + } + + ArrayType *FedArrayType = ArrayType::get(FedType, FEDEntries.size()); + Constant *Table = ConstantArray::get(FedArrayType, FEDEntries); + GlobalVariable *GV = + new GlobalVariable(M, FedArrayType, false, GlobalValue::InternalLinkage, + Table, CsiUnitFedTableName + BaseId->getName()); + return ConstantExpr::getGetElementPtr(GV->getValueType(), GV, GepArgs); +} + +/// Function entry and exit hook initialization +void CSIImpl::initializeFuncHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + // Initialize function entry hook + Type *FuncPropertyTy = CsiFuncProperty::getType(C); + CsiFuncEntry = M.getOrInsertFunction("__csi_func_entry", IRB.getVoidTy(), + IRB.getInt64Ty(), FuncPropertyTy); + // Initialize function exit hook + Type *FuncExitPropertyTy = CsiFuncExitProperty::getType(C); + CsiFuncExit = M.getOrInsertFunction("__csi_func_exit", IRB.getVoidTy(), + IRB.getInt64Ty(), IRB.getInt64Ty(), + FuncExitPropertyTy); +} + +/// Basic-block hook initialization +void CSIImpl::initializeBasicBlockHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *PropertyTy = CsiBBProperty::getType(C); + CsiBBEntry = M.getOrInsertFunction("__csi_bb_entry", IRB.getVoidTy(), + IRB.getInt64Ty(), PropertyTy); + CsiBBExit = M.getOrInsertFunction("__csi_bb_exit", IRB.getVoidTy(), + IRB.getInt64Ty(), PropertyTy); +} + +/// Loop hook initialization +void CSIImpl::initializeLoopHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *IDType = IRB.getInt64Ty(); + Type *LoopPropertyTy = CsiLoopProperty::getType(C); + Type *LoopExitPropertyTy = CsiLoopExitProperty::getType(C); + + CsiBeforeLoop = M.getOrInsertFunction("__csi_before_loop", IRB.getVoidTy(), + IDType, IRB.getInt64Ty(), + LoopPropertyTy); + CsiAfterLoop = M.getOrInsertFunction("__csi_after_loop", IRB.getVoidTy(), + IDType, LoopPropertyTy); + + CsiLoopBodyEntry = M.getOrInsertFunction("__csi_loopbody_entry", + IRB.getVoidTy(), IDType, + LoopPropertyTy); + CsiLoopBodyExit = M.getOrInsertFunction("__csi_loopbody_exit", + IRB.getVoidTy(), IDType, IDType, + LoopExitPropertyTy); +} + +// Call-site hook initialization +void CSIImpl::initializeCallsiteHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *PropertyTy = CsiCallProperty::getType(C); + CsiBeforeCallsite = M.getOrInsertFunction("__csi_before_call", + IRB.getVoidTy(), IRB.getInt64Ty(), + IRB.getInt64Ty(), PropertyTy); + CsiAfterCallsite = M.getOrInsertFunction("__csi_after_call", IRB.getVoidTy(), + IRB.getInt64Ty(), IRB.getInt64Ty(), + PropertyTy); +} + +// Alloca (local variable) hook initialization +void CSIImpl::initializeAllocaHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *IDType = IRB.getInt64Ty(); + Type *AddrType = IRB.getInt8PtrTy(); + Type *PropType = CsiAllocaProperty::getType(C); + + CsiAfterAlloca = M.getOrInsertFunction("__csi_after_alloca", IRB.getVoidTy(), + IDType, AddrType, IntptrTy, PropType); +} + +// Non-local-variable allocation/free hook initialization +void CSIImpl::initializeAllocFnHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *RetType = IRB.getVoidTy(); + Type *IDType = IRB.getInt64Ty(); + Type *AddrType = IRB.getInt8PtrTy(); + Type *LargeNumBytesType = IntptrTy; + Type *AllocFnPropType = CsiAllocFnProperty::getType(C); + Type *FreePropType = CsiFreeProperty::getType(C); + + CsiBeforeAllocFn = M.getOrInsertFunction("__csi_before_allocfn", RetType, + IDType, LargeNumBytesType, + LargeNumBytesType, LargeNumBytesType, + AddrType, AllocFnPropType); + CsiAfterAllocFn = M.getOrInsertFunction("__csi_after_allocfn", RetType, + IDType, /* new ptr */ AddrType, + /* size */ LargeNumBytesType, + /* num elements */ LargeNumBytesType, + /* alignment */ LargeNumBytesType, + /* old ptr */ AddrType, + /* property */ AllocFnPropType); + + CsiBeforeFree = M.getOrInsertFunction("__csi_before_free", RetType, IDType, + AddrType, FreePropType); + CsiAfterFree = M.getOrInsertFunction("__csi_after_free", RetType, IDType, + AddrType, FreePropType); +} + +// Load and store hook initialization +void CSIImpl::initializeLoadStoreHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *LoadPropertyTy = CsiLoadStoreProperty::getType(C); + Type *StorePropertyTy = CsiLoadStoreProperty::getType(C); + Type *RetType = IRB.getVoidTy(); + Type *AddrType = IRB.getInt8PtrTy(); + Type *NumBytesType = IRB.getInt32Ty(); + + CsiBeforeRead = M.getOrInsertFunction("__csi_before_load", RetType, + IRB.getInt64Ty(), AddrType, + NumBytesType, LoadPropertyTy); + CsiAfterRead = M.getOrInsertFunction("__csi_after_load", RetType, + IRB.getInt64Ty(), AddrType, NumBytesType, + LoadPropertyTy); + + CsiBeforeWrite = M.getOrInsertFunction("__csi_before_store", RetType, + IRB.getInt64Ty(), AddrType, + NumBytesType, StorePropertyTy); + CsiAfterWrite = M.getOrInsertFunction("__csi_after_store", RetType, + IRB.getInt64Ty(), AddrType, + NumBytesType, StorePropertyTy); +} + +// Initialization of hooks for LLVM memory intrinsics +void CSIImpl::initializeMemIntrinsicsHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + + MemmoveFn = M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy); + MemcpyFn = M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + IntptrTy); + MemsetFn = M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), + IRB.getInt8PtrTy(), IRB.getInt32Ty(), + IntptrTy); +} + +// Initialization of Tapir hooks +void CSIImpl::initializeTapirHooks() { + LLVMContext &C = M.getContext(); + IRBuilder<> IRB(C); + Type *IDType = IRB.getInt64Ty(); + Type *SyncRegType = IRB.getInt32Ty(); + Type *RetType = IRB.getVoidTy(); + Type *TaskPropertyTy = CsiTaskProperty::getType(C); + Type *TaskExitPropertyTy = CsiTaskExitProperty::getType(C); + Type *DetachPropertyTy = CsiDetachProperty::getType(C); + Type *DetContPropertyTy = CsiDetachContinueProperty::getType(C); + + CsiDetach = + M.getOrInsertFunction("__csi_detach", RetType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, DetachPropertyTy); + CsiTaskEntry = M.getOrInsertFunction("__csi_task", RetType, + /* task_id */ IDType, + /* detach_id */ IDType, TaskPropertyTy); + CsiTaskExit = + M.getOrInsertFunction("__csi_task_exit", RetType, + /* task_exit_id */ IDType, + /* task_id */ IDType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, TaskExitPropertyTy); + CsiDetachContinue = + M.getOrInsertFunction("__csi_detach_continue", RetType, + /* detach_continue_id */ IDType, + /* detach_id */ IDType, + /* sync_reg */ SyncRegType, DetContPropertyTy); + CsiBeforeSync = + M.getOrInsertFunction("__csi_before_sync", RetType, IDType, SyncRegType); + CsiAfterSync = + M.getOrInsertFunction("__csi_after_sync", RetType, IDType, SyncRegType); +} + +// Prepare any calls in the CFG for instrumentation, e.g., by making sure any +// call that can throw is modeled with an invoke. +void CSIImpl::setupCalls(Function &F) { + // If F does not throw, then no need to promote calls to invokes. + if (F.doesNotThrow()) + return; + + promoteCallsInTasksToInvokes(F, "csi.cleanup"); +} + +static BasicBlock *SplitOffPreds(BasicBlock *BB, + SmallVectorImpl &Preds, + DominatorTree *DT, LoopInfo *LI) { + if (BB->isLandingPad()) { + SmallVector NewBBs; + SplitLandingPadPredecessors(BB, Preds, ".csi-split-lp", ".csi-split", + NewBBs, DT, LI); + return NewBBs[1]; + } + + BasicBlock *NewBB = SplitBlockPredecessors(BB, Preds, ".csi-split", DT, LI); + if (isa(BB->getFirstNonPHIOrDbg())) { + // If the block being split is simply contains an unreachable, then replace + // the terminator of the new block with an unreachable. This helps preserve + // invariants on the CFG structure for Tapir placeholder blocks following + // detached.rethrow and taskframe.resume terminators. + ReplaceInstWithInst(NewBB->getTerminator(), + new UnreachableInst(BB->getContext())); + if (DT) { + DT->deleteEdge(NewBB, BB); + } + } + return BB; +} + +// Setup each block such that all of its predecessors belong to the same CSI ID +// space. +static void setupBlock(BasicBlock *BB, const TargetLibraryInfo *TLI, + DominatorTree *DT, LoopInfo *LI) { + if (BB->getUniquePredecessor()) + return; + + SmallVector DetachPreds; + SmallVector TFResumePreds; + SmallVector SyncPreds; + SmallVector SyncUnwindPreds; + SmallVector AllocFnPreds; + SmallVector FreeFnPreds; + SmallVector InvokePreds; + bool HasOtherPredTypes = false; + unsigned NumPredTypes = 0; + + // Partition the predecessors of the landing pad. + for (BasicBlock *Pred : predecessors(BB)) { + if (isa(Pred->getTerminator()) || + isa(Pred->getTerminator()) || + isDetachedRethrow(Pred->getTerminator())) + DetachPreds.push_back(Pred); + else if (isTaskFrameResume(Pred->getTerminator())) + TFResumePreds.push_back(Pred); + else if (isa(Pred->getTerminator())) + SyncPreds.push_back(Pred); + else if (isSyncUnwind(Pred->getTerminator())) + SyncUnwindPreds.push_back(Pred); + else if (CSIImpl::isAllocFn(Pred->getTerminator(), TLI)) + AllocFnPreds.push_back(Pred); + else if (CSIImpl::isFreeFn(Pred->getTerminator(), TLI)) + FreeFnPreds.push_back(Pred); + else if (isa(Pred->getTerminator())) + InvokePreds.push_back(Pred); + else + HasOtherPredTypes = true; + } + + NumPredTypes = static_cast(!DetachPreds.empty()) + + static_cast(!TFResumePreds.empty()) + + static_cast(!SyncPreds.empty()) + + static_cast(!SyncUnwindPreds.empty()) + + static_cast(!AllocFnPreds.empty()) + + static_cast(!FreeFnPreds.empty()) + + static_cast(!InvokePreds.empty()) + + static_cast(HasOtherPredTypes); + + // Splitting predecessors works differently for landingpads versus normal + // basic blocks. If the block is not a landingpad, split off every type of + // predecessor. + unsigned NumPredTypesRequired = static_cast(BB->isLandingPad()); + if (NumPredTypes <= NumPredTypesRequired) + return; + + BasicBlock *BBToSplit = BB; + // Split off the predecessors of each type. + if (!SyncPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, SyncPreds, DT, LI); + NumPredTypes--; + } + if (!SyncUnwindPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); + NumPredTypes--; + } + if (!AllocFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, AllocFnPreds, DT, LI); + NumPredTypes--; + } + if (!FreeFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, FreeFnPreds, DT, LI); + NumPredTypes--; + } + if (!InvokePreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, InvokePreds, DT, LI); + NumPredTypes--; + } + if (!TFResumePreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, TFResumePreds, DT, LI); + NumPredTypes--; + } + // We handle detach and detached.rethrow predecessors at the end to preserve + // invariants on the CFG structure about the deadness of basic blocks after + // detached-rethrows. + if (!DetachPreds.empty() && NumPredTypes > NumPredTypesRequired) { + BBToSplit = SplitOffPreds(BBToSplit, DetachPreds, DT, LI); + NumPredTypes--; + } +} + +// Setup all basic blocks such that each block's predecessors belong entirely to +// one CSI ID space. +void CSIImpl::setupBlocks(Function &F, const TargetLibraryInfo *TLI, + DominatorTree *DT, LoopInfo *LI) { + SmallPtrSet BlocksToSetup; + for (BasicBlock &BB : F) { + if (BB.isLandingPad()) + BlocksToSetup.insert(&BB); + + if (InvokeInst *II = dyn_cast(BB.getTerminator())) { + if (!isTapirPlaceholderSuccessor(II->getNormalDest())) + BlocksToSetup.insert(II->getNormalDest()); + } else if (SyncInst *SI = dyn_cast(BB.getTerminator())) + BlocksToSetup.insert(SI->getSuccessor(0)); + } + + for (BasicBlock *BB : BlocksToSetup) + setupBlock(BB, TLI, DT, LI); +} + +// Split basic blocks so that ordinary call instructions terminate basic blocks. +void CSIImpl::splitBlocksAtCalls(Function &F, DominatorTree *DT, LoopInfo *LI) { + // Split basic blocks after call instructions. + SmallVector CallsToSplit; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isa(I) && + // Skip placeholder call instructions + !callsPlaceholderFunction(I) && + // Skip a call instruction if it is immediately followed by a + // terminator + !I.getNextNode()->isTerminator() && + // If the call does not return, don't bother splitting + !cast(&I)->doesNotReturn()) + CallsToSplit.push_back(&I); + + for (Instruction *Call : CallsToSplit) + SplitBlock(Call->getParent(), Call->getNextNode(), DT, LI); +} + +bool CSIImpl::isFreeFn(const Instruction *I, const TargetLibraryInfo *TLI) { + if (!isa(I)) + return false; + const CallBase *CB = dyn_cast(I); + + if (!TLI) + return false; + + if (getFreedOperand(CB, TLI)) + return true; + + // Ideally we would just use getFreedOperand to determine whether I is a call + // to a libfree funtion. But if -fno-builtin is used, then getFreedOperand + // won't recognize any libfree functions. For instrumentation purposes, + // it's sufficient to recognize the function name. + const StringRef FreeFnNames[] = { + "_ZdlPv", + "_ZdaPv", + "_ZdlPvj", + "_ZdlPvm", + "_ZdlPvRKSt9nothrow_t", + "_ZdlPvSt11align_val_t", + "_ZdaPvj", + "_ZdaPvm", + "_ZdaPvRKSt9nothrow_t", + "_ZdaPvSt11align_val_t", + "_ZdlPvSt11align_val_tRKSt9nothrow_t", + "_ZdaPvSt11align_val_tRKSt9nothrow_t", + "_ZdlPvjSt11align_val_t", + "_ZdlPvmSt11align_val_t", + "_ZdaPvjSt11align_val_t", + "_ZdaPvmSt11align_val_t", + "??3@YAXPAX@Z", + "??3@YAXPAXABUnothrow_t@std@@@Z", + "??3@YAXPAXI@Z", + "??3@YAXPEAX@Z", + "??3@YAXPEAXAEBUnothrow_t@std@@@Z", + "??3@YAXPEAX_K@Z", + "??_V@YAXPAX@Z", + "??_V@YAXPAXABUnothrow_t@std@@@Z", + "??_V@YAXPAXI@Z", + "??_V@YAXPEAX@Z", + "??_V@YAXPEAXAEBUnothrow_t@std@@@Z", + "??_V@YAXPEAX_K@Z", + "__kmpc_free_shared" + }; + + if (const Function *Called = CB->getCalledFunction()) { + StringRef FnName = Called->getName(); + if (!llvm::any_of(FreeFnNames, [&](const StringRef FreeFnName) { + return FnName == FreeFnName; + })) + return false; + + // Confirm that this function is a recognized library function + LibFunc F; + bool FoundLibFunc = TLI->getLibFunc(*Called, F); + return FoundLibFunc; + } + + return false; +} + +bool CSIImpl::isAllocFn(const Instruction *I, const TargetLibraryInfo *TLI) { + if (!isa(I)) + return false; + + if (!TLI) + return false; + + if (isAllocationFn(I, TLI)) + return true; + + // Ideally we would just use isAllocationFn to determine whether I is a call + // to an allocation funtion. But if -fno-builtin is used, then isAllocationFn + // won't recognize any allocation functions. For instrumentation purposes, + // it's sufficient to recognize the function name. + const StringRef AllocFnNames[] = { + "_Znwj", + "_ZnwjRKSt9nothrow_t", + "_ZnwjSt11align_val_t", + "_ZnwjSt11align_val_tRKSt9nothrow_t", + "_Znwm", + "_ZnwmRKSt9nothrow_t", + "_ZnwmSt11align_val_t", + "_ZnwmSt11align_val_tRKSt9nothrow_t", + "_Znaj", + "_ZnajRKSt9nothrow_t", + "_ZnajSt11align_val_t", + "_ZnajSt11align_val_tRKSt9nothrow_t", + "_Znam", + "_ZnamRKSt9nothrow_t", + "_ZnamSt11align_val_t", + "_ZnamSt11align_val_tRKSt9nothrow_t", + "??2@YAPAXI@Z", + "??2@YAPAXIABUnothrow_t@std@@@Z", + "??2@YAPEAX_K@Z", + "??2@YAPEAX_KAEBUnothrow_t@std@@@Z", + "??_U@YAPAXI@Z", + "??_U@YAPAXIABUnothrow_t@std@@@Z", + "??_U@YAPEAX_K@Z", + "??_U@YAPEAX_KAEBUnothrow_t@std@@@Z", + "strdup", + "dunder_strdup", + "strndup", + "dunder_strndup", + "__kmpc_alloc_shared", + "posix_memalign" + }; + + if (const Function *Called = dyn_cast(I)->getCalledFunction()) { + StringRef FnName = Called->getName(); + if (!llvm::any_of(AllocFnNames, [&](const StringRef AllocFnName) { + return FnName == AllocFnName; + })) + return false; + + // Confirm that this function is a recognized library function + LibFunc F; + bool FoundLibFunc = TLI->getLibFunc(*Called, F); + return FoundLibFunc; + } + + return false; +} + +int CSIImpl::getNumBytesAccessed(Type *OrigTy, const DataLayout &DL) { + assert(OrigTy->isSized()); + uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy); + if (TypeSize % 8 != 0) + return -1; + return TypeSize / 8; +} + +void CSIImpl::addLoadStoreInstrumentation(Instruction *I, + FunctionCallee BeforeFn, + FunctionCallee AfterFn, Value *CsiId, + Type *AddrType, Value *Addr, + int NumBytes, + CsiLoadStoreProperty &Prop) { + IRBuilder<> IRB(I); + Value *PropVal = Prop.getValue(IRB); + insertHookCall(I, BeforeFn, + {CsiId, IRB.CreatePointerCast(Addr, AddrType), + IRB.getInt32(NumBytes), PropVal}); + + BasicBlock::iterator Iter = ++I->getIterator(); + IRB.SetInsertPoint(&*Iter); + insertHookCall(&*Iter, AfterFn, + {CsiId, IRB.CreatePointerCast(Addr, AddrType), + IRB.getInt32(NumBytes), PropVal}); +} + +void CSIImpl::instrumentLoadOrStore(Instruction *I, + CsiLoadStoreProperty &Prop) { + IRBuilder<> IRB(I); + bool IsWrite = isa(I); + Value *Addr = IsWrite ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + Type *Ty = + IsWrite ? cast(I)->getValueOperand()->getType() : I->getType(); + int NumBytes = getNumBytesAccessed(Ty, DL); + Type *AddrType = IRB.getInt8PtrTy(); + + if (NumBytes == -1) + return; // size that we don't recognize + + if (IsWrite) { + uint64_t LocalId = StoreFED.add(*I); + Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); + addLoadStoreInstrumentation(I, CsiBeforeWrite, CsiAfterWrite, CsiId, + AddrType, Addr, NumBytes, Prop); + } else { // is read + uint64_t LocalId = LoadFED.add(*I); + Value *CsiId = LoadFED.localToGlobalId(LocalId, IRB); + addLoadStoreInstrumentation(I, CsiBeforeRead, CsiAfterRead, CsiId, AddrType, + Addr, NumBytes, Prop); + } +} + +void CSIImpl::instrumentAtomic(Instruction *I) { + // For now, print a message that this code contains atomics. + dbgs() + << "WARNING: Uninstrumented atomic operations in program-under-test!\n"; +} + +// TODO: This code for instrumenting memory intrinsics was borrowed +// from TSan. Different tools might have better ways to handle these +// function calls. Replace this logic with a more flexible solution, +// possibly one based on interpositioning. +// +// If a memset intrinsic gets inlined by the code gen, we will miss it. +// So, we either need to ensure the intrinsic is not inlined, or instrument it. +// We do not instrument memset/memmove/memcpy intrinsics (too complicated), +// instead we simply replace them with regular function calls, which are then +// intercepted by the run-time. +// Since our pass runs after everyone else, the calls should not be +// replaced back with intrinsics. If that becomes wrong at some point, +// we will need to call e.g. __csi_memset to avoid the intrinsics. +bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { + IRBuilder<> IRB(I); + if (MemSetInst *M = dyn_cast(I)) { + Instruction *Call = IRB.CreateCall( + MemsetFn, + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + setInstrumentationDebugLoc(I, Call); + I->eraseFromParent(); + return true; + } else if (MemTransferInst *M = dyn_cast(I)) { + Instruction *Call = IRB.CreateCall( + isa(M) ? MemcpyFn : MemmoveFn, + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); + setInstrumentationDebugLoc(I, Call); + I->eraseFromParent(); + return true; + } + return false; +} + +void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) { + IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + bool isEntry = isEntryBlock(BB, TI); + if (isEntry) + IRB.SetInsertPoint(getEntryBBInsertPt(BB)); + uint64_t LocalId = BasicBlockFED.add(BB); + uint64_t BBSizeId = BBSize.add(BB, GetTTI ? + &(*GetTTI)(*BB.getParent()) : nullptr); + assert(LocalId == BBSizeId && + "BB recieved different ID's in FED and sizeinfo tables."); + Value *CsiId = BasicBlockFED.localToGlobalId(LocalId, IRB); + CsiBBProperty Prop; + Prop.setIsLandingPad(BB.isLandingPad()); + Prop.setIsEHPad(BB.isEHPad()); + Instruction *TermI = BB.getTerminator(); + Value *PropVal = Prop.getValue(IRB); + insertHookCall(&*IRB.GetInsertPoint(), CsiBBEntry, {CsiId, PropVal}); + IRB.SetInsertPoint(TermI); + CallInst *Call = insertHookCall(TermI, CsiBBExit, {CsiId, PropVal}); + // If this is an entry block and the insert point is the terminator, make the + // BBExit hook be the insert point instead. + if (isEntry && getEntryBBInsertPt(BB) == TermI) + EntryBBInsertPt[&BB] = Call; +} + +// Helper function to get a value for the runtime trip count of the given loop. +static const SCEV *getRuntimeTripCount(Loop &L, ScalarEvolution *SE) { + BasicBlock *Latch = L.getLoopLatch(); + + const SCEV *BECountSC = SE->getExitCount(&L, Latch); + if (isa(BECountSC) || + !BECountSC->getType()->isIntegerTy()) { + LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); + return SE->getCouldNotCompute(); + } + + // Add 1 since the backedge count doesn't include the first loop iteration. + const SCEV *TripCountSC = + SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1)); + if (isa(TripCountSC)) { + LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); + return SE->getCouldNotCompute(); + } + + return TripCountSC; +} + +void CSIImpl::instrumentLoop(Loop &L, TaskInfo &TI, ScalarEvolution *SE) { + assert(L.isLoopSimplifyForm() && "CSI assumes loops are in simplified form."); + BasicBlock *Preheader = L.getLoopPreheader(); + BasicBlock *Header = L.getHeader(); + SmallVector ExitingBlocks, ExitBlocks; + L.getExitingBlocks(ExitingBlocks); + L.getUniqueExitBlocks(ExitBlocks); + + // We assign a local ID for this loop here, so that IDs for loops follow a + // depth-first ordering. + csi_id_t LocalId = LoopFED.add(*Header); + + // Recursively instrument each subloop. + for (Loop *SubL : L) + instrumentLoop(*SubL, TI, SE); + + // Record properties of this loop. + CsiLoopProperty LoopProp; + LoopProp.setIsTapirLoop(static_cast(getTaskIfTapirLoop(&L, &TI))); + LoopProp.setHasUniqueExitingBlock((ExitingBlocks.size() == 1)); + + IRBuilder<> IRB(Preheader->getTerminator()); + Value *LoopCsiId = LoopFED.localToGlobalId(LocalId, IRB); + Value *LoopPropVal = LoopProp.getValue(IRB); + + // Try to evaluate the runtime trip count for this loop. Default to a count + // of -1 for unknown trip counts. + Value *TripCount = IRB.getInt64(-1); + if (SE) { + const SCEV *TripCountSC = getRuntimeTripCount(L, SE); + if (!isa(TripCountSC)) { + // Extend the TripCount type if necessary. + if (TripCountSC->getType() != IRB.getInt64Ty()) + TripCountSC = SE->getZeroExtendExpr(TripCountSC, IRB.getInt64Ty()); + // Compute the trip count to pass to the CSI hook. + SCEVExpander Expander(*SE, DL, "csi"); + TripCount = Expander.expandCodeFor(TripCountSC, IRB.getInt64Ty(), + &*IRB.GetInsertPoint()); + } + } + + // Insert before-loop hook. + insertHookCall(&*IRB.GetInsertPoint(), CsiBeforeLoop, {LoopCsiId, TripCount, + LoopPropVal}); + + // Insert loop-body-entry hook. + IRB.SetInsertPoint(&*Header->getFirstInsertionPt()); + // TODO: Pass IVs to hook? + insertHookCall(&*IRB.GetInsertPoint(), CsiLoopBodyEntry, {LoopCsiId, + LoopPropVal}); + + // Insert hooks at the ends of the exiting blocks. + for (BasicBlock *BB : ExitingBlocks) { + // Record properties of this loop exit + CsiLoopExitProperty LoopExitProp; + LoopExitProp.setIsLatch(L.isLoopLatch(BB)); + + // Insert the loop-exit hook + IRB.SetInsertPoint(BB->getTerminator()); + csi_id_t LocalExitId = LoopExitFED.add(*BB); + Value *ExitCsiId = LoopFED.localToGlobalId(LocalExitId, IRB); + Value *LoopExitPropVal = LoopExitProp.getValue(IRB); + // TODO: For latches, record whether the loop will repeat. + insertHookCall(&*IRB.GetInsertPoint(), CsiLoopBodyExit, + {ExitCsiId, LoopCsiId, LoopExitPropVal}); + } + // Insert after-loop hooks. + for (BasicBlock *BB : ExitBlocks) { + IRB.SetInsertPoint(&*BB->getFirstInsertionPt()); + insertHookCall(&*IRB.GetInsertPoint(), CsiAfterLoop, {LoopCsiId, + LoopPropVal}); + } +} + +void CSIImpl::instrumentCallsite(Instruction *I, DominatorTree *DT) { + if (callsPlaceholderFunction(*I)) + return; + + bool IsInvoke = isa(I); + Function *Called = nullptr; + if (CallInst *CI = dyn_cast(I)) + Called = CI->getCalledFunction(); + else if (InvokeInst *II = dyn_cast(I)) + Called = II->getCalledFunction(); + + bool shouldInstrumentBefore = true; + bool shouldInstrumentAfter = true; + + // Does this call require instrumentation before or after? + if (Called) { + shouldInstrumentBefore = Config->DoesFunctionRequireInstrumentationForPoint( + Called->getName(), InstrumentationPoint::INSTR_BEFORE_CALL); + shouldInstrumentAfter = Config->DoesFunctionRequireInstrumentationForPoint( + Called->getName(), InstrumentationPoint::INSTR_AFTER_CALL); + } + + if (!shouldInstrumentAfter && !shouldInstrumentBefore) + return; + + IRBuilder<> IRB(I); + Value *DefaultID = getDefaultID(IRB); + uint64_t LocalId = CallsiteFED.add(*I, Called ? Called->getName() : ""); + Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); + Value *FuncId = nullptr; + GlobalVariable *FuncIdGV = nullptr; + if (Called) { + std::string GVName = CsiFuncIdVariablePrefix + Called->getName().str(); + Type *FuncIdGVTy = IRB.getInt64Ty(); + FuncIdGV = dyn_cast( + M.getOrInsertGlobal(GVName, FuncIdGVTy)); + assert(FuncIdGV); + FuncIdGV->setConstant(false); + if (Options.jitMode && !Called->empty()) + FuncIdGV->setLinkage(Called->getLinkage()); + else + FuncIdGV->setLinkage(GlobalValue::WeakAnyLinkage); + FuncIdGV->setInitializer(IRB.getInt64(CsiCallsiteUnknownTargetId)); + FuncId = IRB.CreateLoad(FuncIdGVTy, FuncIdGV); + } else { + // Unknown targets (i.e. indirect calls) are always unknown. + FuncId = IRB.getInt64(CsiCallsiteUnknownTargetId); + } + assert(FuncId != NULL); + CsiCallProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + Prop.setIsIndirect(!Called); + Value *PropVal = Prop.getValue(IRB); + if (shouldInstrumentBefore) + insertHookCall(I, CsiBeforeCallsite, {CallsiteId, FuncId, PropVal}); + + BasicBlock::iterator Iter(I); + if (shouldInstrumentAfter) { + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + insertHookCallInSuccessorBB(II->getNormalDest(), II->getParent(), + CsiAfterCallsite, + {CallsiteId, FuncId, PropVal}, + {DefaultID, DefaultID, DefaultPropVal}); + insertHookCallInSuccessorBB(II->getUnwindDest(), II->getParent(), + CsiAfterCallsite, + {CallsiteId, FuncId, PropVal}, + {DefaultID, DefaultID, DefaultPropVal}); + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + PropVal = Prop.getValue(IRB); + insertHookCall(&*Iter, CsiAfterCallsite, {CallsiteId, FuncId, PropVal}); + } + } +} + +void CSIImpl::interposeCall(Instruction *I) { + CallBase *CB = dyn_cast(I); + if (!CB) + return; + + Function *Called = CB->getCalledFunction(); + + // Should we interpose this call? + if (Called && Called->getName().size() > 0) { + bool shouldInterpose = + Config->DoesFunctionRequireInterposition(Called->getName()); + + if (shouldInterpose) { + Function *interpositionFunction = getInterpositionFunction(Called); + assert(interpositionFunction != nullptr); + CB->setCalledFunction(interpositionFunction); + } + } +} + +static void getTaskExits(DetachInst *DI, + SmallVectorImpl &TaskReturns, + SmallVectorImpl &TaskResumes, + SmallVectorImpl &SharedEHExits, + TaskInfo &TI) { + BasicBlock *DetachedBlock = DI->getDetached(); + Task *T = TI.getTaskFor(DetachedBlock); + BasicBlock *ContinueBlock = DI->getContinue(); + + // Examine the predecessors of the continue block and save any predecessors in + // the task as a task return. + for (BasicBlock *Pred : predecessors(ContinueBlock)) { + if (T->simplyEncloses(Pred)) { + assert(isa(Pred->getTerminator())); + TaskReturns.push_back(Pred); + } + } + + // If the detach cannot throw, we're done. + if (!DI->hasUnwindDest()) + return; + + // Detached-rethrow exits can appear in strange places within a task-exiting + // spindle. Hence we loop over all blocks in the spindle to find + // detached rethrows. + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + if (S->isSharedEH()) { + if (llvm::any_of(predecessors(S), + [](const Spindle *Pred) { return !Pred->isSharedEH(); })) + SharedEHExits.push_back(S); + continue; + } + + for (BasicBlock *B : S->blocks()) + if (isDetachedRethrow(B->getTerminator())) + TaskResumes.push_back(B); + } +} + +BasicBlock::iterator +CSIImpl::getFirstInsertionPtInDetachedBlock(BasicBlock *Detached) { + for (Instruction &I : *Detached) + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::taskframe_use == II->getIntrinsicID()) + return ++(II->getIterator()); + return Detached->getFirstInsertionPt(); +} + +void CSIImpl::instrumentDetach(DetachInst *DI, unsigned SyncRegNum, + unsigned NumSyncRegs, DominatorTree *DT, + TaskInfo &TI, LoopInfo &LI) { + LLVMContext &Ctx = DI->getContext(); + BasicBlock *TaskEntryBlock = TI.getTaskFor(DI->getParent())->getEntry(); + IRBuilder<> IDBuilder(getEntryBBInsertPt(*TaskEntryBlock)); + bool TapirLoopBody = spawnsTapirLoopBody(DI, LI, TI); + ConstantInt *SyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), SyncRegNum); + ConstantInt *DefaultSyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + CsiDetachProperty DetachProp; + DetachProp.setForTapirLoopBody(TapirLoopBody); + // Instrument the detach instruction itself + Value *DetachID; + { + IRBuilder<> IRB(DI); + uint64_t LocalID = DetachFED.add(*DI); + DetachID = DetachFED.localToGlobalId(LocalID, IDBuilder); + insertHookCall(DI, CsiDetach, + {DetachID, SyncRegVal, DetachProp.getValue(IRB)}); + } + + // Find the detached block, continuation, and associated reattaches. + BasicBlock *DetachedBlock = DI->getDetached(); + BasicBlock *ContinueBlock = DI->getContinue(); + Task *T = TI.getTaskFor(DetachedBlock); + SmallVector TaskExits, TaskResumes; + SmallVector SharedEHExits; + getTaskExits(DI, TaskExits, TaskResumes, SharedEHExits, TI); + + // Instrument the entry and exit points of the detached task. + { + // Instrument the entry point of the detached task. + IRBuilder<> IRB(&*getFirstInsertionPtInDetachedBlock(DetachedBlock)); + uint64_t LocalID = TaskFED.add(*DetachedBlock); + Value *TaskID = TaskFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskProperty Prop; + Prop.setIsTapirLoopBody(TapirLoopBody); + Prop.setNumSyncReg(NumSyncRegs); + Instruction *Call = IRB.CreateCall(CsiTaskEntry, {TaskID, DetachID, + Prop.getValue(IRB)}); + setInstrumentationDebugLoc(*DetachedBlock, Call); + + // Instrument the exit points of the detached tasks. + for (BasicBlock *Exit : TaskExits) { + IRBuilder<> IRB(Exit->getTerminator()); + uint64_t LocalID = TaskExitFED.add(*Exit->getTerminator()); + Value *ExitID = TaskExitFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + insertHookCall( + Exit->getTerminator(), CsiTaskExit, + {ExitID, TaskID, DetachID, SyncRegVal, ExitProp.getValue(IRB)}); + } + // Instrument the EH exits of the detached task. + for (BasicBlock *Exit : TaskResumes) { + IRBuilder<> IRB(Exit->getTerminator()); + uint64_t LocalID = TaskExitFED.add(*Exit->getTerminator()); + Value *ExitID = TaskExitFED.localToGlobalId(LocalID, IDBuilder); + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + insertHookCall( + Exit->getTerminator(), CsiTaskExit, + {ExitID, TaskID, DetachID, SyncRegVal, ExitProp.getValue(IRB)}); + } + + Value *DefaultID = getDefaultID(IDBuilder); + for (Spindle *SharedEH : SharedEHExits) { + // Skip shared-eh spindle exits that are placeholder unreachable blocks. + if (isa( + SharedEH->getEntry()->getFirstNonPHIOrDbgOrLifetime())) + continue; + CsiTaskExitProperty ExitProp; + ExitProp.setIsTapirLoopBody(TapirLoopBody); + insertHookCallAtSharedEHSpindleExits( + SharedEH, T, CsiTaskExit, TaskExitFED, + {TaskID, DetachID, SyncRegVal, ExitProp.getValueImpl(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, + CsiTaskExitProperty::getDefaultValueImpl(Ctx)}); + } + } + + // Instrument the continuation of the detach. + { + if (isCriticalContinueEdge(DI, 1)) + ContinueBlock = SplitCriticalEdge( + DI, 1, + CriticalEdgeSplittingOptions(DT, &LI).setSplitDetachContinue()); + + IRBuilder<> IRB(&*ContinueBlock->getFirstInsertionPt()); + uint64_t LocalID = DetachContinueFED.add(*ContinueBlock); + Value *ContinueID = DetachContinueFED.localToGlobalId(LocalID, IDBuilder); + CsiDetachContinueProperty ContProp; + ContProp.setForTapirLoopBody(TapirLoopBody); + Instruction *Call = + IRB.CreateCall(CsiDetachContinue, {ContinueID, DetachID, SyncRegVal, + ContProp.getValue(IRB)}); + setInstrumentationDebugLoc(*ContinueBlock, Call); + } + // Instrument the unwind of the detach, if it exists. + if (DI->hasUnwindDest()) { + BasicBlock *UnwindBlock = DI->getUnwindDest(); + BasicBlock *PredBlock = DI->getParent(); + if (Value *TF = T->getTaskFrameUsed()) { + // If the detached task uses a taskframe, then we want to insert the + // detach_continue instrumentation for the unwind destination after the + // taskframe.resume. + UnwindBlock = getTaskFrameResumeDest(TF); + assert(UnwindBlock && + "Detach with unwind uses a taskframe with no resume"); + PredBlock = getTaskFrameResume(TF)->getParent(); + } + Value *DefaultID = getDefaultID(IDBuilder); + uint64_t LocalID = DetachContinueFED.add(*UnwindBlock); + Value *ContinueID = DetachContinueFED.localToGlobalId(LocalID, IDBuilder); + CsiDetachContinueProperty ContProp; + Value *DefaultPropVal = ContProp.getValueImpl(Ctx); + ContProp.setIsUnwind(); + ContProp.setForTapirLoopBody(TapirLoopBody); + insertHookCallInSuccessorBB( + UnwindBlock, PredBlock, CsiDetachContinue, + {ContinueID, DetachID, SyncRegVal, ContProp.getValue(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, DefaultPropVal}); + for (BasicBlock *DRPred : predecessors(UnwindBlock)) + if (isDetachedRethrow(DRPred->getTerminator(), DI->getSyncRegion())) + insertHookCallInSuccessorBB( + UnwindBlock, DRPred, CsiDetachContinue, + {ContinueID, DetachID, SyncRegVal, ContProp.getValue(Ctx)}, + {DefaultID, DefaultID, DefaultSyncRegVal, DefaultPropVal}); + } +} + +void CSIImpl::instrumentSync(SyncInst *SI, unsigned SyncRegNum) { + LLVMContext &Ctx = SI->getContext(); + IRBuilder<> IRB(SI); + Value *DefaultID = getDefaultID(IRB); + // Get the ID of this sync. + uint64_t LocalID = SyncFED.add(*SI); + Value *SyncID = SyncFED.localToGlobalId(LocalID, IRB); + ConstantInt *SyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), SyncRegNum); + ConstantInt *DefaultSyncRegVal = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + + // Insert instrumentation before the sync. + insertHookCall(SI, CsiBeforeSync, {SyncID, SyncRegVal}); + BasicBlock *SyncBB = SI->getParent(); + BasicBlock *SyncCont = SI->getSuccessor(0); + BasicBlock *SyncUnwind = nullptr; + if (SyncsWithUnwinds.count(SI)) { + InvokeInst *II = dyn_cast(SyncCont->getTerminator()); + SyncBB = SyncCont; + SyncUnwind = II->getUnwindDest(); + SyncCont = II->getNormalDest(); + } + + insertHookCallInSuccessorBB(SyncCont, SyncBB, CsiAfterSync, + {SyncID, SyncRegVal}, + {DefaultID, DefaultSyncRegVal}); + + // If we have no unwind for the sync, then we're done. + if (!SyncUnwind) + return; + + insertHookCallInSuccessorBB(SyncUnwind, SyncBB, CsiAfterSync, + {SyncID, SyncRegVal}, + {DefaultID, DefaultSyncRegVal}); +} + +void CSIImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { + IRBuilder<> IRB(I); + bool AllocaInEntryBlock = isEntryBlock(*I->getParent(), TI); + if (AllocaInEntryBlock) + IRB.SetInsertPoint(getEntryBBInsertPt(*I->getParent())); + AllocaInst *AI = cast(I); + + uint64_t LocalId = AllocaFED.add(*I); + Value *CsiId = AllocaFED.localToGlobalId(LocalId, IRB); + + CsiAllocaProperty Prop; + Prop.setIsStatic(AI->isStaticAlloca()); + Value *PropVal = Prop.getValue(IRB); + + // Get size of allocation. + uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); + Value *SizeVal = IRB.getInt64(Size); + if (AI->isArrayAllocation()) + SizeVal = IRB.CreateMul(SizeVal, + IRB.CreateZExtOrBitCast(AI->getArraySize(), + IRB.getInt64Ty())); + + BasicBlock::iterator Iter(I); + if (!AllocaInEntryBlock) { + Iter++; + IRB.SetInsertPoint(&*Iter); + } else { + Iter = IRB.GetInsertPoint(); + } + + Type *AddrType = IRB.getInt8PtrTy(); + Value *Addr = IRB.CreatePointerCast(I, AddrType); + insertHookCall(&*Iter, CsiAfterAlloca, {CsiId, Addr, SizeVal, PropVal}); +} + +bool CSIImpl::getAllocFnArgs(const Instruction *I, + SmallVectorImpl &AllocFnArgs, + Type *SizeTy, Type *AddrTy, + const TargetLibraryInfo &TLI) { + const CallBase *CB = dyn_cast(I); + + std::pair SizeArgs = getAllocSizeArgs(CB, &TLI); + // If the first size argument is null, then we failed to get size arguments + // for this call. + if (!SizeArgs.first) + return false; + + Value *AlignmentArg = getAllocAlignment(CB, &TLI); + + // Push the size arguments. + AllocFnArgs.push_back(SizeArgs.first); + // The second size argument is the number of elements allocated (i.e., for + // calloc-like functions). + if (SizeArgs.second) + AllocFnArgs.push_back(SizeArgs.second); + else + // Report number of elements == 1. + AllocFnArgs.push_back(ConstantInt::get(SizeTy, 1)); + + // Push the alignment argument or 0 if there is no alignment argument. + if (AlignmentArg) + AllocFnArgs.push_back(AlignmentArg); + else + AllocFnArgs.push_back(ConstantInt::get(SizeTy, 0)); + + // Return the old pointer argument for realloc-like functions or nullptr for + // other allocation functions. + if (Value *Reallocated = getReallocatedOperand(CB)) + AllocFnArgs.push_back(Reallocated); + else + AllocFnArgs.push_back(Constant::getNullValue(AddrTy)); + + return true; +} + +void CSIImpl::instrumentAllocFn(Instruction *I, DominatorTree *DT, + const TargetLibraryInfo *TLI) { + bool IsInvoke = isa(I); + Function *Called = nullptr; + if (CallInst *CI = dyn_cast(I)) + Called = CI->getCalledFunction(); + else if (InvokeInst *II = dyn_cast(I)) + Called = II->getCalledFunction(); + + assert(Called && "Could not get called function for allocation fn."); + + IRBuilder<> IRB(I); + Value *DefaultID = getDefaultID(IRB); + uint64_t LocalId = AllocFnFED.add(*I); + Value *AllocFnId = AllocFnFED.localToGlobalId(LocalId, IRB); + + SmallVector AllocFnArgs; + getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getInt8PtrTy(), *TLI); + SmallVector DefaultAllocFnArgs({ + /* Allocated size */ Constant::getNullValue(IntptrTy), + /* Number of elements */ Constant::getNullValue(IntptrTy), + /* Alignment */ Constant::getNullValue(IntptrTy), + /* Old pointer */ Constant::getNullValue(IRB.getInt8PtrTy()), + }); + + CsiAllocFnProperty Prop; + Value *DefaultPropVal = Prop.getValue(IRB); + LibFunc AllocLibF; + TLI->getLibFunc(*Called, AllocLibF); + Prop.setAllocFnTy(static_cast(getAllocFnTy(AllocLibF))); + AllocFnArgs.push_back(Prop.getValue(IRB)); + DefaultAllocFnArgs.push_back(DefaultPropVal); + + BasicBlock::iterator Iter(I); + if (IsInvoke) { + // There are two "after" positions for invokes: the normal block and the + // exception block. + InvokeInst *II = cast(I); + + BasicBlock *NormalBB = II->getNormalDest(); + unsigned SuccNum = GetSuccessorNumber(II->getParent(), NormalBB); + if (isCriticalEdge(II, SuccNum)) + NormalBB = + SplitCriticalEdge(II, SuccNum, CriticalEdgeSplittingOptions(DT)); + // Insert hook into normal destination. + { + IRB.SetInsertPoint(&*NormalBB->getFirstInsertionPt()); + SmallVector AfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + insertHookCall(&*IRB.GetInsertPoint(), CsiAfterAllocFn, AfterAllocFnArgs); + } + // Insert hook into unwind destination. + { + // The return value of the allocation function is not valid in the unwind + // destination. + SmallVector AfterAllocFnArgs, DefaultAfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + DefaultAfterAllocFnArgs.push_back(DefaultID); + DefaultAfterAllocFnArgs.push_back( + Constant::getNullValue(IRB.getInt8PtrTy())); + DefaultAfterAllocFnArgs.append(DefaultAllocFnArgs.begin(), + DefaultAllocFnArgs.end()); + insertHookCallInSuccessorBB(II->getUnwindDest(), II->getParent(), + CsiAfterAllocFn, AfterAllocFnArgs, + DefaultAfterAllocFnArgs); + } + } else { + // Simple call instruction; there is only one "after" position. + Iter++; + IRB.SetInsertPoint(&*Iter); + SmallVector AfterAllocFnArgs; + AfterAllocFnArgs.push_back(AllocFnId); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); + insertHookCall(&*Iter, CsiAfterAllocFn, AfterAllocFnArgs); + } +} + +void CSIImpl::instrumentFree(Instruction *I, const TargetLibraryInfo *TLI) { + // It appears that frees (and deletes) never throw. + assert(isa(I) && "Free call is not a call instruction"); + + CallInst *FC = cast(I); + Function *Called = FC->getCalledFunction(); + assert(Called && "Could not get called function for free."); + + IRBuilder<> IRB(I); + uint64_t LocalId = FreeFED.add(*I); + Value *FreeId = FreeFED.localToGlobalId(LocalId, IRB); + + // All currently supported free functions free the first argument. + Value *Addr = FC->getArgOperand(0); + CsiFreeProperty Prop; + LibFunc FreeLibF; + TLI->getLibFunc(*Called, FreeLibF); + Prop.setFreeTy(static_cast(getFreeTy(FreeLibF))); + + insertHookCall(I, CsiBeforeFree, {FreeId, Addr, Prop.getValue(IRB)}); + BasicBlock::iterator Iter(I); + Iter++; + insertHookCall(&*Iter, CsiAfterFree, {FreeId, Addr, Prop.getValue(IRB)}); +} + +CallInst *CSIImpl::insertHookCall(Instruction *I, FunctionCallee HookFunction, + ArrayRef HookArgs) { + IRBuilder<> IRB(I); + CallInst *Call = IRB.CreateCall(HookFunction, HookArgs); + setInstrumentationDebugLoc(I, (Instruction *)Call); + return Call; +} + +bool CSIImpl::updateArgPHIs(BasicBlock *Succ, BasicBlock *BB, + FunctionCallee HookFunction, + ArrayRef HookArgs, + ArrayRef DefaultArgs) { + // If we've already created a PHI node in this block for the hook arguments, + // just add the incoming arguments to the PHIs. + auto Key = std::make_pair(Succ, cast(HookFunction.getCallee())); + if (ArgPHIs.count(Key)) { + unsigned HookArgNum = 0; + for (PHINode *ArgPHI : ArgPHIs[Key]) { + ArgPHI->setIncomingValue(ArgPHI->getBasicBlockIndex(BB), + HookArgs[HookArgNum]); + ++HookArgNum; + } + return true; + } + + // Create PHI nodes in this block for each hook argument. + IRBuilder<> IRB(&Succ->front()); + unsigned HookArgNum = 0; + for (Value *Arg : HookArgs) { + PHINode *ArgPHI = IRB.CreatePHI(Arg->getType(), 2); + for (BasicBlock *Pred : predecessors(Succ)) { + if (Pred == BB) + ArgPHI->addIncoming(Arg, BB); + else + ArgPHI->addIncoming(DefaultArgs[HookArgNum], Pred); + } + ArgPHIs[Key].push_back(ArgPHI); + ++HookArgNum; + } + return false; +} + +CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, + FunctionCallee HookFunction, + ArrayRef HookArgs, + ArrayRef DefaultArgs) { + assert(HookFunction && "No hook function given."); + // If this successor block has a unique predecessor, just insert the hook call + // as normal. + if (Succ->getUniquePredecessor()) { + assert(Succ->getUniquePredecessor() == BB && + "BB is not unique predecessor of successor block"); + return insertHookCall(&*Succ->getFirstInsertionPt(), HookFunction, + HookArgs); + } + + if (updateArgPHIs(Succ, BB, HookFunction, HookArgs, DefaultArgs)) + return nullptr; + + auto Key = std::make_pair(Succ, cast(HookFunction.getCallee())); + SmallVector SuccessorHookArgs; + for (PHINode *ArgPHI : ArgPHIs[Key]) + SuccessorHookArgs.push_back(ArgPHI); + + IRBuilder<> IRB(&*Succ->getFirstInsertionPt()); + // Insert the hook call, using the PHI as the CSI ID. + CallInst *Call = IRB.CreateCall(HookFunction, SuccessorHookArgs); + setInstrumentationDebugLoc(*Succ, (Instruction *)Call); + + return Call; +} + +void CSIImpl::insertHookCallAtSharedEHSpindleExits( + Spindle *SharedEHSpindle, Task *T, FunctionCallee HookFunction, + FrontEndDataTable &FED, ArrayRef HookArgs, + ArrayRef DefaultArgs) { + // Get the set of shared EH spindles to examine. Store them in post order, so + // they can be evaluated in reverse post order. + SmallVector WorkList; + for (Spindle *S : post_order>(SharedEHSpindle)) + WorkList.push_back(S); + + // Traverse the shared-EH spindles in reverse post order, updating the + // hook-argument PHI's along the way. + SmallPtrSet Visited; + for (Spindle *S : llvm::reverse(WorkList)) { + bool NoNewPHINode = true; + // If this spindle is the first shared-EH spindle in the traversal, use the + // given hook arguments to update the PHI node. + if (S == SharedEHSpindle) { + for (Spindle::SpindleEdge &InEdge : S->in_edges()) { + Spindle *SPred = InEdge.first; + BasicBlock *Pred = InEdge.second; + if (T->contains(SPred)) + NoNewPHINode &= + updateArgPHIs(S->getEntry(), Pred, HookFunction, HookArgs, + DefaultArgs); + } + } else { + // Otherwise update the PHI node based on the predecessor shared-eh + // spindles in this RPO traversal. + for (Spindle::SpindleEdge &InEdge : S->in_edges()) { + Spindle *SPred = InEdge.first; + BasicBlock *Pred = InEdge.second; + if (Visited.count(SPred)) { + auto Key = std::make_pair(SPred->getEntry(), + cast(HookFunction.getCallee())); + SmallVector NewHookArgs( + ArgPHIs[Key].begin(), ArgPHIs[Key].end()); + NoNewPHINode &= + updateArgPHIs(S->getEntry(), Pred, HookFunction, NewHookArgs, + DefaultArgs); + } + } + } + Visited.insert(S); + + if (NoNewPHINode) + continue; + + // Detached-rethrow exits can appear in strange places within a task-exiting + // spindle. Hence we loop over all blocks in the spindle to find detached + // rethrows. + auto Key = std::make_pair(S->getEntry(), + cast(HookFunction.getCallee())); + for (BasicBlock *B : S->blocks()) { + if (isDetachedRethrow(B->getTerminator())) { + IRBuilder<> IRB(B->getTerminator()); + uint64_t LocalID = FED.add(*B->getTerminator()); + Value *HookID = FED.localToGlobalId(LocalID, IRB); + SmallVector Args({HookID}); + Args.append(ArgPHIs[Key].begin(), ArgPHIs[Key].end()); + Instruction *Call = IRB.CreateCall(HookFunction, Args); + setInstrumentationDebugLoc(*B, Call); + } + } + } +} + +void CSIImpl::initializeFEDTables() { + FunctionFED = FrontEndDataTable(M, CsiFunctionBaseIdName, + "__csi_unit_fed_table_function", + "__csi_unit_function_name_", + /*UseExistingBaseId=*/false); + FunctionExitFED = FrontEndDataTable(M, CsiFunctionExitBaseIdName, + "__csi_unit_fed_table_function_exit", + "__csi_unit_function_name_"); + LoopFED = FrontEndDataTable(M, CsiLoopBaseIdName, + "__csi_unit_fed_table_loop"); + LoopExitFED = FrontEndDataTable(M, CsiLoopExitBaseIdName, + "__csi_unit_fed_table_loop"); + BasicBlockFED = FrontEndDataTable(M, CsiBasicBlockBaseIdName, + "__csi_unit_fed_table_basic_block"); + CallsiteFED = FrontEndDataTable(M, CsiCallsiteBaseIdName, + "__csi_unit_fed_table_callsite", + "__csi_unit_function_name_"); + LoadFED = FrontEndDataTable(M, CsiLoadBaseIdName, + "__csi_unit_fed_table_load"); + StoreFED = FrontEndDataTable(M, CsiStoreBaseIdName, + "__csi_unit_fed_table_store"); + AllocaFED = FrontEndDataTable(M, CsiAllocaBaseIdName, + "__csi_unit_fed_table_alloca", + "__csi_unit_variable_name_"); + DetachFED = FrontEndDataTable(M, CsiDetachBaseIdName, + "__csi_unit_fed_table_detach"); + TaskFED = FrontEndDataTable(M, CsiTaskBaseIdName, + "__csi_unit_fed_table_task"); + TaskExitFED = FrontEndDataTable(M, CsiTaskExitBaseIdName, + "__csi_unit_fed_table_task_exit"); + DetachContinueFED = FrontEndDataTable(M, CsiDetachContinueBaseIdName, + "__csi_unit_fed_table_detach_continue"); + SyncFED = FrontEndDataTable(M, CsiSyncBaseIdName, + "__csi_unit_fed_table_sync"); + AllocFnFED = FrontEndDataTable(M, CsiAllocFnBaseIdName, + "__csi_unit_fed_table_allocfn", + "__csi_unit_variable_name_"); + FreeFED = FrontEndDataTable(M, CsiFreeBaseIdName, + "__csi_unit_fed_free"); +} + +void CSIImpl::initializeSizeTables() { + BBSize = SizeTable(M, CsiBasicBlockBaseIdName); +} + +uint64_t CSIImpl::getLocalFunctionID(Function &F) { + uint64_t LocalId = FunctionFED.add(F); + FuncOffsetMap[F.getName()] = LocalId; + return LocalId; +} + +void CSIImpl::generateInitCallsiteToFunction() { + LLVMContext &C = M.getContext(); + BasicBlock *EntryBB = BasicBlock::Create(C, "", InitCallsiteToFunction); + IRBuilder<> IRB(ReturnInst::Create(C, EntryBB)); + + GlobalVariable *Base = FunctionFED.baseId(); + Type *BaseTy = IRB.getInt64Ty(); + LoadInst *LI = IRB.CreateLoad(BaseTy, Base); + // Traverse the map of function name -> function local id. Generate + // a store of each function's global ID to the corresponding weak + // global variable. + for (const auto &it : FuncOffsetMap) { + std::string GVName = CsiFuncIdVariablePrefix + it.first.str(); + GlobalVariable *GV = nullptr; + if ((GV = M.getGlobalVariable(GVName)) == nullptr) { + GV = new GlobalVariable(M, IRB.getInt64Ty(), false, + (Options.jitMode ? GlobalValue::ExternalLinkage : + GlobalValue::WeakAnyLinkage), + IRB.getInt64(CsiCallsiteUnknownTargetId), GVName); + } + assert(GV); + IRB.CreateStore(IRB.CreateAdd(LI, IRB.getInt64(it.second)), GV); + } +} + +void CSIImpl::initializeCsi() { + IntptrTy = DL.getIntPtrType(M.getContext()); + + initializeFEDTables(); + initializeSizeTables(); + if (Options.InstrumentFuncEntryExit) + initializeFuncHooks(); + if (Options.InstrumentMemoryAccesses) + initializeLoadStoreHooks(); + if (Options.InstrumentLoops) + initializeLoopHooks(); + if (Options.InstrumentBasicBlocks) + initializeBasicBlockHooks(); + if (Options.InstrumentCalls) + initializeCallsiteHooks(); + if (Options.InstrumentMemIntrinsics) + initializeMemIntrinsicsHooks(); + if (Options.InstrumentTapir) + initializeTapirHooks(); + if (Options.InstrumentAllocas) + initializeAllocaHooks(); + if (Options.InstrumentAllocFns) + initializeAllocFnHooks(); + + FunctionType *FnType = + FunctionType::get(Type::getVoidTy(M.getContext()), {}, false); + InitCallsiteToFunction = cast(M.getOrInsertFunction( + CsiInitCallsiteToFunctionName, + FnType) + .getCallee()); + assert(InitCallsiteToFunction); + + InitCallsiteToFunction->setLinkage(GlobalValue::InternalLinkage); + + /* + The runtime declares this as a __thread var --- need to change this decl + generation or the tool won't compile DisableInstrGV = new GlobalVariable(M, + IntegerType::get(M.getContext(), 1), false, GlobalValue::ExternalLinkage, + nullptr, CsiDisableInstrumentationName, nullptr, + GlobalValue::GeneralDynamicTLSModel, 0, + true); + */ +} + +// Create a struct type to match the unit_fed_entry_t type in csirt.c. +StructType *CSIImpl::getUnitFedTableType(LLVMContext &C, + PointerType *EntryPointerType) { + return StructType::get(IntegerType::get(C, 64), Type::getInt8PtrTy(C, 0), + EntryPointerType); +} + +Constant *CSIImpl::fedTableToUnitFedTable(Module &M, + StructType *UnitFedTableType, + FrontEndDataTable &FedTable) { + Constant *NumEntries = + ConstantInt::get(IntegerType::get(M.getContext(), 64), FedTable.size()); + Constant *BaseIdPtr = ConstantExpr::getPointerCast( + FedTable.baseId(), Type::getInt8PtrTy(M.getContext(), 0)); + Constant *InsertedTable = FedTable.insertIntoModule(M); + return ConstantStruct::get(UnitFedTableType, NumEntries, BaseIdPtr, + InsertedTable); +} + +void CSIImpl::collectUnitFEDTables() { + LLVMContext &C = M.getContext(); + StructType *UnitFedTableType = + getUnitFedTableType(C, FrontEndDataTable::getPointerType(C)); + + // The order of the FED tables here must match the enum in csirt.c and the + // instrumentation_counts_t in csi.h. + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, FunctionFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, FunctionExitFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, LoopFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, LoopExitFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, BasicBlockFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, CallsiteFED)); + UnitFedTables.push_back(fedTableToUnitFedTable(M, UnitFedTableType, LoadFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, StoreFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, DetachFED)); + UnitFedTables.push_back(fedTableToUnitFedTable(M, UnitFedTableType, TaskFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, TaskExitFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, DetachContinueFED)); + UnitFedTables.push_back(fedTableToUnitFedTable(M, UnitFedTableType, SyncFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, AllocaFED)); + UnitFedTables.push_back( + fedTableToUnitFedTable(M, UnitFedTableType, AllocFnFED)); + UnitFedTables.push_back(fedTableToUnitFedTable(M, UnitFedTableType, FreeFED)); +} + +// Create a struct type to match the unit_obj_entry_t type in csirt.c. +StructType *CSIImpl::getUnitSizeTableType(LLVMContext &C, + PointerType *EntryPointerType) { + return StructType::get(IntegerType::get(C, 64), EntryPointerType); +} + +Constant *CSIImpl::sizeTableToUnitSizeTable(Module &M, + StructType *UnitSizeTableType, + SizeTable &SzTable) { + Constant *NumEntries = + ConstantInt::get(IntegerType::get(M.getContext(), 64), SzTable.size()); + // Constant *BaseIdPtr = + // ConstantExpr::getPointerCast(FedTable.baseId(), + // Type::getInt8PtrTy(M.getContext(), 0)); + Constant *InsertedTable = SzTable.insertIntoModule(M); + return ConstantStruct::get(UnitSizeTableType, NumEntries, InsertedTable); +} + +void CSIImpl::collectUnitSizeTables() { + LLVMContext &C = M.getContext(); + StructType *UnitSizeTableType = + getUnitSizeTableType(C, SizeTable::getPointerType(C)); + + UnitSizeTables.push_back( + sizeTableToUnitSizeTable(M, UnitSizeTableType, BBSize)); +} + +CallInst *CSIImpl::createRTUnitInitCall(IRBuilder<> &IRB) { + LLVMContext &C = M.getContext(); + + StructType *UnitFedTableType = + getUnitFedTableType(C, FrontEndDataTable::getPointerType(C)); + StructType *UnitSizeTableType = + getUnitSizeTableType(C, SizeTable::getPointerType(C)); + + // Lookup __csirt_unit_init + SmallVector InitArgTypes({IRB.getInt8PtrTy(), + PointerType::get(UnitFedTableType, 0), + PointerType::get(UnitSizeTableType, 0), + InitCallsiteToFunction->getType()}); + FunctionType *InitFunctionTy = + FunctionType::get(IRB.getVoidTy(), InitArgTypes, false); + RTUnitInit = M.getOrInsertFunction(CsiRtUnitInitName, InitFunctionTy); + assert(isa(RTUnitInit.getCallee()) && + "Failed to get or insert __csirt_unit_init function"); + + ArrayType *UnitFedTableArrayType = + ArrayType::get(UnitFedTableType, UnitFedTables.size()); + Constant *FEDTable = ConstantArray::get(UnitFedTableArrayType, UnitFedTables); + GlobalVariable *FEDGV = new GlobalVariable( + M, UnitFedTableArrayType, false, GlobalValue::InternalLinkage, FEDTable, + CsiUnitFedTableArrayName); + ArrayType *UnitSizeTableArrayType = + ArrayType::get(UnitSizeTableType, UnitSizeTables.size()); + Constant *SzTable = + ConstantArray::get(UnitSizeTableArrayType, UnitSizeTables); + GlobalVariable *SizeGV = new GlobalVariable( + M, UnitSizeTableArrayType, false, GlobalValue::InternalLinkage, SzTable, + CsiUnitSizeTableArrayName); + + Constant *Zero = ConstantInt::get(IRB.getInt32Ty(), 0); + Value *GepArgs[] = {Zero, Zero}; + + // Insert call to __csirt_unit_init + return IRB.CreateCall( + RTUnitInit, + {IRB.CreateGlobalStringPtr(M.getName(), "__csi_module_name"), + ConstantExpr::getGetElementPtr(FEDGV->getValueType(), FEDGV, GepArgs), + ConstantExpr::getGetElementPtr(SizeGV->getValueType(), SizeGV, GepArgs), + InitCallsiteToFunction}); +} + +void CSIImpl::finalizeCsi() { + // Insert __csi_func_id_ weak symbols for all defined functions and + // generate the runtime code that stores to all of them. + generateInitCallsiteToFunction(); + + Function *Ctor = Function::Create( + FunctionType::get(Type::getVoidTy(M.getContext()), false), + GlobalValue::InternalLinkage, CsiRtUnitCtorName, &M); + BasicBlock *CtorBB = BasicBlock::Create(M.getContext(), "", Ctor); + IRBuilder<> IRB(ReturnInst::Create(M.getContext(), CtorBB)); + CallInst *Call = createRTUnitInitCall(IRB); + // TODO: Add version-check to the cunstructor? See + // ModuleUtils::createSanitizerCtorAndInitFunctions for example. + + // Add the ctor to llvm.global_ctors via appendToGlobalCtors() if either + // llvm.global_ctors does not exist or it exists with an initializer. One of + // these two conditions should always hold for modules compiled normally, but + // appendToGlobalCtors can crash if a tool, such as bugpoint, removes the + // initializer from llvm.global_ctors. This change facilitates using bugpoint + // to debug crashes involving CSI. + if (GlobalVariable *GVCtor = M.getNamedGlobal("llvm.global_ctors")) { + if (GVCtor->hasInitializer()) + appendToGlobalCtors(M, Ctor, CsiUnitCtorPriority); + } else { + appendToGlobalCtors(M, Ctor, CsiUnitCtorPriority); + } + + CallGraphNode *CNCtor = CG->getOrInsertFunction(Ctor); + CallGraphNode *CNFunc = + CG->getOrInsertFunction(cast(RTUnitInit.getCallee())); + CNCtor->addCalledFunction(Call, CNFunc); +} + +namespace { +// Custom DiagnosticInfo for linking a tool bitcode file. +class CSILinkDiagnosticInfo : public DiagnosticInfo { + const Module *SrcM; + const Twine &Msg; + +public: + CSILinkDiagnosticInfo(DiagnosticSeverity Severity, const Module *SrcM, + const Twine &Msg) + : DiagnosticInfo(DK_Lowering, Severity), SrcM(SrcM), Msg(Msg) {} + void print(DiagnosticPrinter &DP) const override { + DP << "linking module '" << SrcM->getModuleIdentifier() << "': " << Msg; + } +}; + +// Custom DiagnosticHandler to handle diagnostics arising when linking a tool +// bitcode file. +class CSIDiagnosticHandler final : public DiagnosticHandler { + const Module *SrcM; + DiagnosticHandler *OrigHandler; + +public: + CSIDiagnosticHandler(const Module *SrcM, DiagnosticHandler *OrigHandler) + : SrcM(SrcM), OrigHandler(OrigHandler) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override { + if (DI.getKind() != DK_Linker) + return OrigHandler->handleDiagnostics(DI); + + std::string MsgStorage; + { + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + } + return OrigHandler->handleDiagnostics( + CSILinkDiagnosticInfo(DI.getSeverity(), SrcM, MsgStorage)); + } +}; +} // namespace + +static GlobalVariable *copyGlobalArray(const char *Array, Module &M) { + // Get the current set of static global constructors. + if (GlobalVariable *GVA = M.getNamedGlobal(Array)) { + if (Constant *Init = GVA->getInitializer()) { + // Copy the existing global constructors into a new variable. + GlobalVariable *NGV = new GlobalVariable( + Init->getType(), GVA->isConstant(), GVA->getLinkage(), Init, "", + GVA->getThreadLocalMode()); + GVA->getParent()->insertGlobalVariable(GVA->getIterator(), NGV); + return NGV; + } + } + return nullptr; +} + +// Replace the modified global array list with the copy of the old version. +static void replaceGlobalArray(const char *Array, Module &M, + GlobalVariable *GVACopy) { + // Get the current version of the global array. + GlobalVariable *GVA = M.getNamedGlobal(Array); + GVACopy->takeName(GVA); + + // Nuke the old list, replacing any uses with the new one. + if (!GVA->use_empty()) { + Constant *V = GVACopy; + if (V->getType() != GVA->getType()) + V = ConstantExpr::getBitCast(V, GVA->getType()); + GVA->replaceAllUsesWith(V); + } + GVA->eraseFromParent(); +} + +// Restore the global array to its copy of its previous value. +static void restoreGlobalArray(const char *Array, Module &M, + GlobalVariable *GVACopy, bool GVAModified) { + if (GVACopy) { + if (GVAModified) { + // Replace the new global array with the old copy. + replaceGlobalArray(Array, M, GVACopy); + } else { + // The bitcode file doesn't add to the global array, so just delete the + // copy. + assert(GVACopy->use_empty()); + GVACopy->eraseFromParent(); + } + } else { // No global array was copied. + if (GVAModified) { + // Create a zero-initialized version of the global array. + GlobalVariable *NewGV = M.getNamedGlobal(Array); + ConstantArray *NewCA = cast(NewGV->getInitializer()); + Constant *CARepl = ConstantArray::get( + ArrayType::get(NewCA->getType()->getElementType(), 0), {}); + GlobalVariable *GVRepl = new GlobalVariable( + CARepl->getType(), NewGV->isConstant(), NewGV->getLinkage(), CARepl, + "", NewGV->getThreadLocalMode()); + NewGV->getParent()->insertGlobalVariable(NewGV->getIterator(), GVRepl); + + // Replace the global array with the zero-initialized version. + replaceGlobalArray(Array, M, GVRepl); + } else { + // Nothing to do. + } + } +} + +void CSIImpl::linkInToolFromBitcode(const std::string &BitcodePath) { + if (BitcodePath != "") { + LLVMContext &C = M.getContext(); + LLVM_DEBUG(dbgs() << "Using external bitcode file for CSI: " + << BitcodePath << "\n"); + SMDiagnostic SMD; + + std::unique_ptr ToolModule = parseIRFile(BitcodePath, SMD, C); + if (!ToolModule) { + C.emitError("CSI: Failed to parse bitcode file: " + BitcodePath); + return; + } + + // Get the original DiagnosticHandler for this context. + std::unique_ptr OrigDiagHandler = + C.getDiagnosticHandler(); + + // Setup a CSIDiagnosticHandler for this context, to handle + // diagnostics that arise from linking ToolModule. + C.setDiagnosticHandler(std::make_unique( + ToolModule.get(), OrigDiagHandler.get())); + + // Get list of functions in ToolModule. + for (Function &TF : *ToolModule) + FunctionsInBitcode.insert(std::string(TF.getName())); + + GlobalVariable *GVCtorCopy = copyGlobalArray("llvm.global_ctors", M); + GlobalVariable *GVDtorCopy = copyGlobalArray("llvm.global_dtors", M); + bool BitcodeAddsCtors = false, BitcodeAddsDtors = false; + + // Link the external module into the current module, copying over global + // values. + bool Fail = Linker::linkModules( + M, std::move(ToolModule), Linker::Flags::LinkOnlyNeeded, + [&](Module &M, const StringSet<> &GVS) { + for (StringRef GVName : GVS.keys()) { + LLVM_DEBUG(dbgs() << "Linking global value " << GVName << "\n"); + if (GVName == "llvm.global_ctors") { + BitcodeAddsCtors = true; + continue; + } else if (GVName == "llvm.global_dtors") { + BitcodeAddsDtors = true; + continue; + } + // Record this GlobalValue as linked from the bitcode. + LinkedFromBitcode.insert(M.getNamedValue(GVName)); + if (Function *Fn = M.getFunction(GVName)) { + if (!Fn->isDeclaration() && !Fn->hasComdat()) { + // We set the function's linkage as available_externally, so + // that subsequent optimizations can remove these definitions + // from the module. We don't want this module redefining any of + // these symbols, even if they aren't inlined, because the + // OpenCilk runtime library will provide those definitions + // later. + Fn->setLinkage(Function::AvailableExternallyLinkage); + } + } else if (GlobalVariable *GV = M.getGlobalVariable(GVName)) { + if (!GV->isDeclaration() && !GV->hasComdat()) { + GV->setLinkage(Function::AvailableExternallyLinkage); + } + } + } + }); + if (Fail) + C.emitError("CSI: Failed to link bitcode file: " + Twine(BitcodePath)); + + // Restore the original DiagnosticHandler for this context. + C.setDiagnosticHandler(std::move(OrigDiagHandler)); + + restoreGlobalArray("llvm.global_ctors", M, GVCtorCopy, BitcodeAddsCtors); + restoreGlobalArray("llvm.global_dtors", M, GVDtorCopy, BitcodeAddsDtors); + + LinkedBitcode = true; + } +} + +void CSIImpl::loadConfiguration() { + if (ClConfigurationFilename != "") + Config = InstrumentationConfig::ReadFromConfigurationFile( + ClConfigurationFilename); + else + Config = InstrumentationConfig::GetDefault(); + + Config->SetConfigMode(ClConfigurationMode); +} + +Value *CSIImpl::lookupUnderlyingObject(Value *Addr) const { + return getUnderlyingObject(Addr, 0); + // if (!UnderlyingObject.count(Addr)) + // UnderlyingObject[Addr] = getUnderlyingObject(Addr, 0); + + // return UnderlyingObject[Addr]; +} + +bool CSIImpl::shouldNotInstrumentFunction(Function &F) { + Module &M = *F.getParent(); + // Don't instrument standard library calls. +#ifdef WIN32 + if (F.hasName() && F.getName().find("_") == 0) { + return true; + } +#endif + + if (F.hasName() && F.getName().find("__csi") != std::string::npos) + return true; + + // Never instrument the CSI ctor. + if (F.hasName() && F.getName() == CsiRtUnitCtorName) + return true; + + // Don't instrument anything in the startup section or the __StaticInit + // section (MacOSX). + if (F.getSection() == ".text.startup" || + F.getSection().find("__StaticInit") != std::string::npos) + return true; + + // Don't instrument functions that will run before or + // simultaneously with CSI ctors. + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + if (GV == nullptr) + return false; + if (!GV->hasInitializer() || GV->getInitializer()->isNullValue()) + return false; + + ConstantArray *CA = cast(GV->getInitializer()); + for (Use &OP : CA->operands()) { + if (isa(OP)) + continue; + ConstantStruct *CS = cast(OP); + + if (Function *CF = dyn_cast(CS->getOperand(1))) { + uint64_t Priority = + dyn_cast(CS->getOperand(0))->getLimitedValue(); + if (Priority <= CsiUnitCtorPriority && CF->getName() == F.getName()) { + // Do not instrument F. + return true; + } + } + } + // false means do instrument it. + return false; +} + +bool CSIImpl::isVtableAccess(const Instruction *I) { + if (const MDNode *Tag = I->getMetadata(LLVMContext::MD_tbaa)) + return Tag->isTBAAVtableAccess(); + return false; +} + +bool CSIImpl::addrPointsToConstantData(const Value *Addr) { + // If this is a GEP, just analyze its pointer operand. + if (const GetElementPtrInst *GEP = dyn_cast(Addr)) + Addr = GEP->getPointerOperand(); + + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) { + return true; + } + } else if (const LoadInst *L = dyn_cast(Addr)) { + if (isVtableAccess(L)) { + return true; + } + } + return false; +} + +bool CSIImpl::isAtomic(const Instruction *I) { + if (const LoadInst *LI = dyn_cast(I)) + return LI->isAtomic() && LI->getSyncScopeID() != SyncScope::SingleThread; + if (const StoreInst *SI = dyn_cast(I)) + return SI->isAtomic() && SI->getSyncScopeID() != SyncScope::SingleThread; + if (isa(I)) + return true; + if (isa(I)) + return true; + if (isa(I)) + return true; + return false; +} + +bool CSIImpl::isThreadLocalObject(const Value *Obj) { + if (const IntrinsicInst *II = dyn_cast(Obj)) + return Intrinsic::threadlocal_address == II->getIntrinsicID(); + if (const GlobalValue *GV = dyn_cast(Obj)) + return GV->isThreadLocal(); + return false; +} + +void CSIImpl::computeLoadAndStoreProperties( + SmallVectorImpl> + &LoadAndStoreProperties, + SmallVectorImpl &BBLoadsAndStores) { + SmallSet WriteTargets; + + for (SmallVectorImpl::reverse_iterator + It = BBLoadsAndStores.rbegin(), + E = BBLoadsAndStores.rend(); + It != E; ++It) { + Instruction *I = *It; + if (StoreInst *Store = dyn_cast(I)) { + Value *Addr = Store->getPointerOperand(); + WriteTargets.insert(Addr); + CsiLoadStoreProperty Prop; + // Update alignment property data + Prop.setAlignment(MaybeAlign(Store->getAlign())); + // Set vtable-access property + Prop.setIsVtableAccess(isVtableAccess(Store)); + // Set constant-data-access property + Prop.setIsConstant(addrPointsToConstantData(Addr)); + Value *Obj = lookupUnderlyingObject(Addr); + // Set is-on-stack property + Prop.setIsOnStack(isa(Obj)); + // Set may-be-captured property + Prop.setMayBeCaptured(isa(Obj) || + PointerMayBeCaptured(Addr, true, true)); + // Set is-thread-local property + Prop.setIsThreadLocal(isThreadLocalObject(Obj)); + LoadAndStoreProperties.push_back(std::make_pair(I, Prop)); + } else { + LoadInst *Load = cast(I); + Value *Addr = Load->getPointerOperand(); + CsiLoadStoreProperty Prop; + // Update alignment property data + Prop.setAlignment(MaybeAlign(Load->getAlign())); + // Set vtable-access property + Prop.setIsVtableAccess(isVtableAccess(Load)); + // Set constant-data-access-property + Prop.setIsConstant(addrPointsToConstantData(Addr)); + Value *Obj = lookupUnderlyingObject(Addr); + // Set is-on-stack property + Prop.setIsOnStack(isa(Obj)); + // Set may-be-captured property + Prop.setMayBeCaptured(isa(Obj) || + PointerMayBeCaptured(Addr, true, true)); + // Set is-thread-local property + Prop.setIsThreadLocal(isThreadLocalObject(Obj)); + // Set load-read-before-write-in-bb property + bool HasBeenSeen = WriteTargets.count(Addr) > 0; + Prop.setLoadReadBeforeWriteInBB(HasBeenSeen); + LoadAndStoreProperties.push_back(std::make_pair(I, Prop)); + } + } + BBLoadsAndStores.clear(); +} + +// Update the attributes on the instrumented function that might be invalidated +// by the inserted instrumentation. +void CSIImpl::updateInstrumentedFnAttrs(Function &F) { + F.removeFnAttr(Attribute::ReadOnly); + F.removeFnAttr(Attribute::ReadNone); + MemoryEffects CurrentME = F.getMemoryEffects(); + if (MemoryEffects::unknown() != CurrentME) { + F.setMemoryEffects( + CurrentME | + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef) | + MemoryEffects(MemoryEffects::Location::InaccessibleMem, + ModRefInfo::ModRef)); + } +} + +// Return true if BB is an entry block to a function or task, false otherwise. +bool CSIImpl::isEntryBlock(const BasicBlock &BB, const TaskInfo &TI) { + return &BB == TI.getTaskFor(&BB)->getEntry(); +} + +// Check whether function-entry instrumentation can be inserted after +// instruction \p I. +static bool skipInstructionInEntryBB(const Instruction &I) { + if (isa(I)) + return true; + + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(&I)) { + // Skip simple intrinsics + switch(II->getIntrinsicID()) { + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + case Intrinsic::taskframe_use: + return true; + default: + return false; + } + } + + return false; +} + +// Scan the entry basic block \p BB to find the first point to insert +// instrumentation. +Instruction *CSIImpl::getEntryBBInsertPt(BasicBlock &BB) { + // If a previous insertion point was already found for this entry block, + // return it. + if (EntryBBInsertPt.count(&BB)) + return EntryBBInsertPt[&BB]; + + BasicBlock::iterator BI(BB.getFirstInsertionPt()); + BasicBlock::const_iterator BE(BB.end()); + + // Scan the basic block for the first instruction we should not skip. + while (BI != BE) { + if (!skipInstructionInEntryBB(*BI)) { + EntryBBInsertPt.insert(std::make_pair(&BB, &*BI)); + return &*BI; + } + ++BI; + } + + // We reached the end of the basic block; return the terminator. + EntryBBInsertPt.insert(std::make_pair(&BB, BB.getTerminator())); + return BB.getTerminator(); +} + +void CSIImpl::instrumentFunction(Function &F) { + // This is required to prevent instrumenting the call to + // __csi_module_init from within the module constructor. + + if (F.empty() || shouldNotInstrumentFunction(F) || + LinkedFromBitcode.count(&F)) + return; + + if (Options.CallsMayThrow) + // Promote calls to invokes to insert CSI instrumentation in + // exception-handling code. + setupCalls(F); + + const TargetLibraryInfo *TLI = &GetTLI(F); + + DominatorTree *DT = &GetDomTree(F); + LoopInfo &LI = GetLoopInfo(F); + + // If we do not assume that calls terminate blocks, or if we're not + // instrumenting basic blocks, then we're done. + if (Options.InstrumentBasicBlocks && Options.CallsTerminateBlocks) + splitBlocksAtCalls(F, DT, &LI); + + if (Options.InstrumentLoops) + // Simplify loops to prepare for loop instrumentation + for (Loop *L : LI) + simplifyLoop(L, DT, &LI, nullptr, nullptr, nullptr, + /* PreserveLCSSA */ false); + + // Canonicalize the CFG for CSI instrumentation + setupBlocks(F, TLI, DT, &LI); + + LLVM_DEBUG(dbgs() << "Canonicalized function:\n" << F); + + SmallVector, 8> + LoadAndStoreProperties; + SmallVector AllocationFnCalls; + SmallVector FreeCalls; + SmallVector MemIntrinsics; + SmallVector Callsites; + SmallVector BasicBlocks; + SmallVector AtomicAccesses; + SmallVector Detaches; + SmallVector Syncs; + SmallVector Allocas; + SmallVector AllCalls; + bool MaySpawn = false; + SmallPtrSet BBsToIgnore; + + DenseMap SRCounters; + DenseMap SyncRegNums; + + TaskInfo &TI = GetTaskInfo(F); + ScalarEvolution *SE = nullptr; + if (GetScalarEvolution) + SE = &(*GetScalarEvolution)(F); + + // Compile lists of all instrumentation points before anything is modified. + for (BasicBlock &BB : F) { + // Ignore Tapir placeholder basic blocks + if (&F.getEntryBlock() != &BB && isTapirPlaceholderSuccessor(&BB)) + continue; + if (!DT->isReachableFromEntry(&BB)) + continue; + SmallVector BBLoadsAndStores; + for (Instruction &I : BB) { + if (isAtomic(&I)) + AtomicAccesses.push_back(&I); + else if (isa(I) || isa(I)) { + BBLoadsAndStores.push_back(&I); + } else if (DetachInst *DI = dyn_cast(&I)) { + MaySpawn = true; + Detaches.push_back(DI); + } else if (SyncInst *SI = dyn_cast(&I)) { + Syncs.push_back(SI); + if (isSyncUnwind(SI->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(), + /*SyncRegion=*/nullptr, /*CheckForInvoke=*/true)) { + SyncsWithUnwinds.insert(SI); + BBsToIgnore.insert(SI->getSuccessor(0)); + } + } else if (CallBase *CB = dyn_cast(&I)) { + if (const IntrinsicInst *II = dyn_cast(CB)) { + if (Intrinsic::syncregion_start == II->getIntrinsicID()) { + // Identify this sync region with a counter value, where all sync + // regions within a function or task are numbered from 0. + if (TI.getTaskFor(&BB)) { + BasicBlock *TEntry = TI.getTaskFor(&BB)->getEntry(); + // Create a new counter if need be. + if (!SRCounters.count(TEntry)) + SRCounters[TEntry] = 0; + SyncRegNums[&I] = SRCounters[TEntry]++; + } + } + } + + // Record this function call as either an allocation function, a call to + // free (or delete), a memory intrinsic, or an ordinary real function + // call. + if (isAllocFn(&I, TLI)) + AllocationFnCalls.push_back(&I); + else if (isFreeFn(CB, TLI)) + FreeCalls.push_back(&I); + else if (isa(I)) + MemIntrinsics.push_back(&I); + else if (!callsPlaceholderFunction(I)) + Callsites.push_back(&I); + + AllCalls.push_back(&I); + + computeLoadAndStoreProperties(LoadAndStoreProperties, BBLoadsAndStores); + } else if (isa(I)) { + Allocas.push_back(&I); + } + } + computeLoadAndStoreProperties(LoadAndStoreProperties, BBLoadsAndStores); + if (!BBsToIgnore.count(&BB)) + BasicBlocks.push_back(&BB); + } + + uint64_t LocalId = getLocalFunctionID(F); + IRBuilder<> IRB(getEntryBBInsertPt(F.getEntryBlock())); + Value *FuncId = FunctionFED.localToGlobalId(LocalId, IRB); + + // Instrument basic blocks. Note that we do this before other instrumentation + // so that we put this at the beginning of the basic block, and then the + // function entry call goes before the call to basic block entry. + if (Options.InstrumentBasicBlocks) + for (BasicBlock *BB : BasicBlocks) + instrumentBasicBlock(*BB, TI); + + // Instrument Tapir constructs. + if (Options.InstrumentTapir) { + if (Config->DoesFunctionRequireInstrumentationForPoint( + F.getName(), InstrumentationPoint::INSTR_TAPIR_DETACH)) { + for (DetachInst *DI : Detaches) + instrumentDetach(DI, SyncRegNums[DI->getSyncRegion()], + SRCounters[DI->getDetached()], DT, TI, LI); + } + if (Config->DoesFunctionRequireInstrumentationForPoint( + F.getName(), InstrumentationPoint::INSTR_TAPIR_SYNC)) { + for (SyncInst *SI : Syncs) + instrumentSync(SI, SyncRegNums[SI->getSyncRegion()]); + } + } + + // Instrument allocas early, because they may require instrumentation inserted + // at an unusual place. + if (Options.InstrumentAllocas) + for (Instruction *I : Allocas) + instrumentAlloca(I, TI); + + if (Options.InstrumentLoops) + // Recursively instrument all loops + for (Loop *L : LI) + instrumentLoop(*L, TI, SE); + + // Do this work in a separate loop after copying the iterators so that we + // aren't modifying the list as we're iterating. + if (Options.InstrumentMemoryAccesses) + for (std::pair p : + LoadAndStoreProperties) + instrumentLoadOrStore(p.first, p.second); + + // Instrument atomic memory accesses in any case (they can be used to + // implement synchronization). + if (Options.InstrumentAtomics) + for (Instruction *I : AtomicAccesses) + instrumentAtomic(I); + + if (Options.InstrumentMemIntrinsics) + for (Instruction *I : MemIntrinsics) + instrumentMemIntrinsic(I); + + if (Options.InstrumentCalls) + for (Instruction *I : Callsites) + instrumentCallsite(I, DT); + + if (Options.InstrumentAllocFns) { + for (Instruction *I : AllocationFnCalls) + instrumentAllocFn(I, DT, TLI); + for (Instruction *I : FreeCalls) + instrumentFree(I, TLI); + } + + if (Options.Interpose && Config->DoesAnyFunctionRequireInterposition()) { + for (Instruction *I : AllCalls) + interposeCall(I); + } + + // Instrument function entry/exit points. + if (Options.InstrumentFuncEntryExit) { + IRBuilder<> IRB(cast(FuncId)->getNextNode()); + if (Config->DoesFunctionRequireInstrumentationForPoint( + F.getName(), InstrumentationPoint::INSTR_FUNCTION_ENTRY)) { + CsiFuncProperty FuncEntryProp; + FuncEntryProp.setMaySpawn(MaySpawn); + if (MaySpawn) + FuncEntryProp.setNumSyncReg(SRCounters[TI.getRootTask()->getEntry()]); + Value *PropVal = FuncEntryProp.getValue(IRB); + insertHookCall(&*IRB.GetInsertPoint(), CsiFuncEntry, {FuncId, PropVal}); + } + if (Config->DoesFunctionRequireInstrumentationForPoint( + F.getName(), InstrumentationPoint::INSTR_FUNCTION_EXIT)) { + EscapeEnumerator EE(F, "csi.cleanup", false); + while (IRBuilder<> *AtExit = EE.Next()) { + uint64_t ExitLocalId = FunctionExitFED.add(*AtExit->GetInsertPoint()); + Value *ExitCsiId = + FunctionExitFED.localToGlobalId(ExitLocalId, *AtExit); + CsiFuncExitProperty FuncExitProp; + FuncExitProp.setMaySpawn(MaySpawn); + FuncExitProp.setEHReturn(isa(AtExit->GetInsertPoint())); + Value *PropVal = FuncExitProp.getValue(*AtExit); + insertHookCall(&*AtExit->GetInsertPoint(), CsiFuncExit, + {ExitCsiId, FuncId, PropVal}); + } + } + } + + updateInstrumentedFnAttrs(F); +} + +Function *CSIImpl::getInterpositionFunction(Function *F) { + if (InterpositionFunctions.find(F) != InterpositionFunctions.end()) + return InterpositionFunctions.lookup(F); + + std::string InterposedName = "__csi_interpose_" + F->getName().str(); + Function *InterpositionFunction = cast( + M.getOrInsertFunction(InterposedName, F->getFunctionType()).getCallee()); + + InterpositionFunctions.insert({F, InterpositionFunction}); + + return InterpositionFunction; +} + +void ComprehensiveStaticInstrumentationLegacyPass::getAnalysisUsage( + AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); +} + +bool ComprehensiveStaticInstrumentationLegacyPass::runOnModule(Module &M) { + if (skipModule(M)) + return false; + + CallGraph *CG = &getAnalysis().getCallGraph(); + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + auto GetDomTree = [this](Function &F) -> DominatorTree & { + return this->getAnalysis(F).getDomTree(); + }; + auto GetLoopInfo = [this](Function &F) -> LoopInfo & { + return this->getAnalysis(F).getLoopInfo(); + }; + auto GetTTI = [this](Function &F) -> TargetTransformInfo & { + return this->getAnalysis().getTTI(F); + }; + auto GetSE = [this](Function &F) -> ScalarEvolution & { + return this->getAnalysis(F).getSE(); + }; + auto GetTaskInfo = [this](Function &F) -> TaskInfo & { + return this->getAnalysis(F).getTaskInfo(); + }; + + bool res = CSIImpl(M, CG, GetDomTree, GetLoopInfo, GetTaskInfo, GetTLI, GetSE, + GetTTI, Options) + .run(); + + verifyModule(M, &llvm::errs()); + + numPassRuns++; + + return res; +} + +CSISetupPass::CSISetupPass() : Options(OverrideFromCL(CSIOptions())) {} + +CSISetupPass::CSISetupPass(const CSIOptions &Options) : Options(Options) {} + +PreservedAnalyses CSISetupPass::run(Module &M, ModuleAnalysisManager &AM) { + if (!CSISetupImpl(M, Options).run()) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} + +ComprehensiveStaticInstrumentationPass::ComprehensiveStaticInstrumentationPass() + : Options(OverrideFromCL(CSIOptions())) {} + +ComprehensiveStaticInstrumentationPass::ComprehensiveStaticInstrumentationPass( + const CSIOptions &Options) + : Options(Options) {} + +PreservedAnalyses +ComprehensiveStaticInstrumentationPass::run(Module &M, + ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + + auto &CG = AM.getResult(M); + auto GetDT = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); + }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + auto GetSE = [&FAM](Function &F) -> ScalarEvolution & { + return FAM.getResult(F); + }; + auto GetTI = [&FAM](Function &F) -> TaskInfo & { + return FAM.getResult(F); + }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + + // Disable additional conversion of calls to invokes. + Options.CallsMayThrow = false; + + if (!CSIImpl(M, &CG, GetDT, GetLI, GetTI, GetTLI, GetSE, GetTTI, Options) + .run()) + return PreservedAnalyses::all(); + + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 75adcabc0d34ea..fd1c1f7630113d 100644 --- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -734,9 +734,12 @@ static BasicBlock *getInstrBB(CFGMST &MST, Edge &E, // Some IndirectBr critical edges cannot be split by the previous // SplitIndirectBrCriticalEdges call. Bail out. + // Similarly bail out due to critical edges that cannot be split after detach + // instructions. const unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); - BasicBlock *InstrBB = - isa(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum); + BasicBlock *InstrBB = (isa(TI) || isa(TI)) + ? nullptr + : SplitCriticalEdge(TI, SuccNum); if (!InstrBB) return nullptr; diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp index 806afc8fcdf7cb..55ebbafbed7f3d 100644 --- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -84,4 +84,3 @@ Comdat *llvm::getOrCreateFunctionComdat(Function &F, Triple &T) { F.setComdat(C); return C; } - diff --git a/llvm/lib/Transforms/Instrumentation/SurgicalInstrumentationConfig.cpp b/llvm/lib/Transforms/Instrumentation/SurgicalInstrumentationConfig.cpp new file mode 100644 index 00000000000000..67857f26c8ec31 --- /dev/null +++ b/llvm/lib/Transforms/Instrumentation/SurgicalInstrumentationConfig.cpp @@ -0,0 +1,109 @@ +//===-- SurgicalInstrumentationConfig.cpp -- Surgical CSI -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is part of CSI, a framework that provides comprehensive static +// instrumentation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation/SurgicalInstrumentationConfig.h" + +namespace llvm { +InstrumentationPoint +ParseInstrumentationPoint(const StringRef &instrPointString) { + if (SurgicalInstrumentationPoints.find(instrPointString) == + SurgicalInstrumentationPoints.end()) { + return InstrumentationPoint::INSTR_INVALID_POINT; + } else + return SurgicalInstrumentationPoints[instrPointString]; +} + +std::unique_ptr +llvm::InstrumentationConfig::GetDefault() { + return std::unique_ptr( + new DefaultInstrumentationConfig()); +} + +std::unique_ptr +InstrumentationConfig::ReadFromConfigurationFile(const std::string &filename) { + auto file = MemoryBuffer::getFile(filename); + + if (!file) { + llvm::report_fatal_error( + Twine("Instrumentation configuration file could not be opened: ") + + Twine(file.getError().message())); + } + + StringRef contents = file.get()->getBuffer(); + SmallVector lines; + + contents.split(lines, '\n', -1, false); + + StringMap functions; + StringSet<> interposedFunctions; + + bool interposeMode = false; + + // One instruction per line. + for (auto &line : lines) { + auto trimmedLine = line.trim(); + if (trimmedLine.size() == 0 || + trimmedLine[0] == '#') // Skip comments or empty lines. + continue; + + if (trimmedLine == "INTERPOSE") { + interposeMode = true; + continue; + } else if (trimmedLine == "INSTRUMENT") { + interposeMode = false; + continue; + } + + SmallVector tokens; + trimmedLine.split(tokens, ',', -1, false); + + if (interposeMode) { + interposedFunctions.insert(tokens[0]); + } else { + if (tokens.size() > 0) { + InstrumentationPoint points = InstrumentationPoint::INSTR_INVALID_POINT; + if (tokens.size() > + 1) // This function specifies specific instrumentation points. + { + for (size_t i = 1; i < tokens.size(); ++i) { + auto instrPoint = ParseInstrumentationPoint(tokens[i].trim()); + + points |= instrPoint; + } + } + + auto trimmed = tokens[0].trim(); + if (trimmed != "") + functions[trimmed] = points; + } + } + } + + // If the configuration file turned out to be empty, + // instrument everything. + if (functions.size() == 0 && interposedFunctions.size() == 0) + return GetDefault(); + + for (auto &function : functions) { + if (interposedFunctions.find(function.getKey()) != interposedFunctions.end()) { + llvm::errs() << "warning: function for which interpositioning was " + "requested is also listed for instrumentation. The " + "function will only be interposed"; + } + } + + return std::unique_ptr( + new InstrumentationConfig(functions, interposedFunctions)); +} + +} // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index ce35eefb63fa25..b0a9e85be2aa2d 100644 --- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -47,6 +47,7 @@ #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" using namespace llvm; @@ -576,6 +577,9 @@ bool ThreadSanitizer::sanitizeFunction(Function &F, IRB.getInt32(0)); IRB.CreateCall(TsanFuncEntry, ReturnAddress); + if (ClHandleCxxExceptions && !F.doesNotThrow()) + promoteCallsInTasksToInvokes(F, "tsan_cleanup"); + EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions); while (IRBuilder<> *AtExit = EE.Next()) { InstrumentationIRBuilder::ensureDebugInfo(*AtExit, F); diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp index 67e8e82e408f64..30b3e7bdf01378 100644 --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1546,6 +1546,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); continue; } + // Inst is known to be a call instruction here. + if (cast(Inst).isStrandPure() && + Inst.getParent() != InVal.first->getParent()) { + // TODO: Teach this pass about spindles. + LLVM_DEBUG(dbgs() << "Skipping due to strand pure block crossing\n"); + continue; + } if (!Inst.use_empty()) Inst.replaceAllUsesWith(InVal.first); salvageKnowledge(&Inst, &AC); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 1ede4e7932af5b..71540bccff5390 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -40,6 +40,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" @@ -72,6 +73,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/VNCoercion.h" #include #include @@ -761,9 +763,10 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { auto *MemDep = isMemDepEnabled() ? &AM.getResult(F) : nullptr; auto *LI = AM.getCachedResult(F); + auto *TI = AM.getCachedResult(F); auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, TI, MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); @@ -774,6 +777,8 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { PA.preserve(); if (LI) PA.preserve(); + if (TI) + PA.preserve(); return PA; } @@ -1588,8 +1593,12 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock, if (IsValueFullyAvailableInBlock(Pred, FullyAvailableBlocks)) { continue; } + if (isa(Pred->getTerminator())) { + continue; + } - if (Pred->getTerminator()->getNumSuccessors() != 1) { + if (Pred->getTerminator()->getNumSuccessors() != 1 && + !isa(Pred->getTerminator())) { if (isa(Pred->getTerminator())) { LLVM_DEBUG( dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '" @@ -1877,6 +1886,21 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) { } bool Changed = false; + + // If we depend on a detach instruction, reject. + for (unsigned i = 0, e = NumDeps; i != e; ++i) { + MemDepResult DepInfo = Deps[i].getResult(); + if (!(DepInfo.getInst())) + continue; + if (isa(DepInfo.getInst()) || + isa(DepInfo.getInst())) { + LLVM_DEBUG(dbgs() << "GVN: Cannot process " << *Load + << " due to dependency on" << *(DepInfo.getInst()) + << "\n"); + return Changed; + } + } + // If this load follows a GEP, see if we can PRE the indices before analyzing. if (GetElementPtrInst *GEP = dyn_cast(Load->getOperand(0))) { @@ -2701,7 +2725,8 @@ bool GVNPass::processInstruction(Instruction *I) { bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, MemoryDependenceResults *RunMD, LoopInfo *LI, - OptimizationRemarkEmitter *RunORE, MemorySSA *MSSA) { + OptimizationRemarkEmitter *RunORE, TaskInfo *TI, + MemorySSA *MSSA) { AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2761,6 +2786,12 @@ bool GVNPass::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, // iteration. DeadBlocks.clear(); + if (TI && Changed) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TI->recalculate(F, *DT); + if (MSSA && VerifyMemorySSA) MSSA->verifyMemorySSA(); @@ -2924,6 +2955,8 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { if (InvalidBlockRPONumbers) assignBlockRPONumber(*CurrentBlock->getParent()); + SmallVector, 8> Reattaches; + SmallVector, 8> Detaches; SmallVector, 8> predMap; for (BasicBlock *P : predecessors(CurrentBlock)) { // We're not interested in PRE where blocks with predecessors that are @@ -2943,15 +2976,27 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { uint32_t TValNo = VN.phiTranslate(P, CurrentBlock, ValNo, *this); Value *predV = findLeader(P, TValNo); if (!predV) { - predMap.push_back(std::make_pair(static_cast(nullptr), P)); - PREPred = P; - ++NumWithout; + if (!isa(P->getTerminator())) { + predMap.push_back(std::make_pair(static_cast(nullptr), P)); + PREPred = P; + ++NumWithout; + } + // Record any detach and reattach predecessors. + if (DetachInst *DI = dyn_cast(P->getTerminator())) + Detaches.push_back(std::make_pair(static_cast(nullptr), DI)); + if (ReattachInst *RI = dyn_cast(P->getTerminator())) + Reattaches.push_back(std::make_pair(static_cast(nullptr), RI)); } else if (predV == CurInst) { /* CurInst dominates this predecessor. */ NumWithout = 2; break; } else { predMap.push_back(std::make_pair(predV, P)); + // Record any detach and reattach predecessors. + if (DetachInst *DI = dyn_cast(P->getTerminator())) + Detaches.push_back(std::make_pair(predV, DI)); + if (ReattachInst *RI = dyn_cast(P->getTerminator())) + Reattaches.push_back(std::make_pair(predV, RI)); ++NumWith; } } @@ -2961,6 +3006,23 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { if (NumWithout > 1 || NumWith == 0) return false; + for (auto RV : Reattaches) { + ReattachInst *RI = RV.second; + bool DetachFound = false; + for (auto DV : Detaches) { + DetachInst *DI = DV.second; + // Get the detach edge from DI. + BasicBlockEdge DetachEdge(DI->getParent(), DI->getDetached()); + if (DT->dominates(DetachEdge, RI->getParent())) { + DetachFound = true; + if (RV.first && (RV.first != DV.first)) + return false; + } + } + assert(DetachFound && + "Reattach predecessor found with no detach predecessor"); + } + // We may have a case where all predecessors have the instruction, // and we just need to insert a phi node. Otherwise, perform // insertion. @@ -2984,7 +3046,8 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { // the edge to be split and perform the PRE the next time we iterate // on the function. unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock); - if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) { + if (isCriticalEdge(PREPred->getTerminator(), SuccNum) && + !isa(PREPred->getTerminator())) { toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum)); return false; } @@ -2997,6 +3060,22 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) { #endif PREInstr->deleteValue(); return false; + } else if (isa(PREPred->getTerminator())) { + for (auto RV : Reattaches) { + ReattachInst *RI = RV.second; + for (auto DV : Detaches) { + DetachInst *DI = DV.second; + // Get the detach edge from DI. + BasicBlockEdge DetachEdge(DI->getParent(), DI->getDetached()); + if (DT->dominates(DetachEdge, RI->getParent())) { + if (DI->getParent() == PREPred) { + assert(nullptr == DV.first && + "Detach predecessor already had a value."); + predMap.push_back(std::make_pair(PREInstr, RI->getParent())); + } + } + } + } } } @@ -3219,6 +3298,12 @@ void GVNPass::addDeadBlock(BasicBlock *BB) { if (llvm::is_contained(successors(P), B) && isCriticalEdge(P->getTerminator(), B)) { + + // Don't bother splitting critical edges to a detach-continue block, + // since both the detach and reattach predecessors must be dead. + if (isDetachContinueEdge(P->getTerminator(), B)) + continue; + if (BasicBlock *S = splitCriticalEdges(P, B)) DeadBlocks.insert(P = S); } @@ -3302,6 +3387,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { auto *LIWP = getAnalysisIfAvailable(); + auto *TIWP = getAnalysisIfAvailable(); auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), @@ -3313,6 +3399,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr, &getAnalysis().getORE(), + TIWP ? &TIWP->getTaskInfo() : nullptr, MSSAWP ? &MSSAWP->getMSSA() : nullptr); } @@ -3329,6 +3416,7 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); } @@ -3343,6 +3431,7 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp index b564f00eb9d166..e9bd67d3977537 100644 --- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/BasicBlock.h" diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 40475d9563b2c1..c4632dca9e37f7 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -75,6 +76,7 @@ #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include #include #include @@ -133,9 +135,11 @@ class IndVarSimplify { TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; std::unique_ptr MSSAU; + TaskInfo *TI; SmallVector DeadInsts; bool WidenIndVars; + bool TapirLoopsOnly; bool handleFloatingPointIV(Loop *L, PHINode *PH); bool rewriteNonIntegerIVs(Loop *L); @@ -162,9 +166,10 @@ class IndVarSimplify { public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) - : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), - WidenIndVars(WidenIndVars) { + TargetTransformInfo *TTI, MemorySSA *MSSA, TaskInfo *TI, + bool WidenIndVars, bool TapirLoopsOnly) + : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), TI(TI), + WidenIndVars(WidenIndVars), TapirLoopsOnly(TapirLoopsOnly) { if (MSSA) MSSAU = std::make_unique(MSSA); } @@ -698,9 +703,35 @@ static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) { return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V; } +/// Helper method to check if the given IV has the widest induction type. +static bool isWidestInductionType(Loop *L, PHINode *SimpleIV) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + uint64_t IVWidth = SimpleIV->getType()->getPrimitiveSizeInBits(); + for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) { + PHINode *Phi = cast(I); + if (Phi == SimpleIV) + continue; + + // Skip PHI nodes that are not of integer type. + if (!Phi->getType()->isIntegerTy()) + continue; + + // Skip PHI nodes that are not loop counters. + int Idx = Phi->getBasicBlockIndex(L->getLoopLatch()); + if (Idx < 0) + continue; + + // Check if Phi has a larger valid width than SimpleIV. + uint64_t PhiWidth = Phi->getType()->getPrimitiveSizeInBits(); + if (IVWidth < PhiWidth && DL.isLegalInteger(PhiWidth)) + return false; + } + return true; +} + /// linearFunctionTestReplace policy. Return true unless we can show that the /// current exit test is already sufficiently canonical. -static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { +static bool needsLFTR(Loop *L, BasicBlock *ExitingBB, TaskInfo *TI) { assert(L->getLoopLatch() && "Must be in simplified form"); // Avoid converting a constant or loop invariant test back to a runtime @@ -744,7 +775,24 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { // Do LFTR if the exit condition's IV is *not* a simple counter. Value *IncV = Phi->getIncomingValue(Idx); - return Phi != getLoopPhiForCounter(IncV, L); + if (Phi != getLoopPhiForCounter(IncV, L)) + return true; + + // Tapir loops are particularly picky about having canonical induction + // variables, so check if LFTR needs to create one. + if (getTaskIfTapirLoop(L, TI)) { + // Check that the simple IV has the widest induction type. + if (!isWidestInductionType(L, Phi)) + return true; + + // Check that the simple IV starts at 0. + if (BasicBlock *Preheader = L->getLoopPreheader()) + if (Constant *Start = + dyn_cast(Phi->getIncomingValueForBlock(Preheader))) + return !(Start->isZeroValue()); + } + + return false; } /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils @@ -1878,6 +1926,31 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) { return Changed; } +static bool ensureZeroStartIV(Loop *L, const DataLayout &DL, + ScalarEvolution *SE, DominatorTree *DT) { + BasicBlock *LatchBlock = L->getLoopLatch(); + + const SCEV *ExitCount = SE->getExitCount(L, LatchBlock); + if (isa(ExitCount)) + return false; + + PHINode *IndVar = FindLoopCounter(L, LatchBlock, ExitCount, SE, DT); + if (!IndVar) + return false; + + Instruction * const IncVar = + cast(IndVar->getIncomingValueForBlock(LatchBlock)); + + const SCEVAddRecExpr *AR = cast(SE->getSCEV(IncVar)); + + if (!AR->getStart()->isZero()) { + SCEVExpander ARRewriter(*SE, DL, "indvars"); + ARRewriter.expandCodeFor(AR, AR->getType(), + &L->getHeader()->front()); + } + return true; +} + //===----------------------------------------------------------------------===// // IndVarSimplify driver. Manage several subpasses of IV simplification. //===----------------------------------------------------------------------===// @@ -1899,11 +1972,19 @@ bool IndVarSimplify::run(Loop *L) { if (!L->isLoopSimplifyForm()) return false; + bool IsTapirLoop = (nullptr != getTaskIfTapirLoop(L, TI)); + if (TapirLoopsOnly && !IsTapirLoop) + return false; bool Changed = false; // If there are any floating-point recurrences, attempt to // transform them to use integer recurrences. Changed |= rewriteNonIntegerIVs(L); + // See if we need to create a canonical IV that starts at 0. Right now we + // only check for a Tapir loop, but this check might be generalized. + if (IsTapirLoop) + Changed |= ensureZeroStartIV(L, DL, SE, DT); + // Create a rewriter object which we'll use to transform the code with. SCEVExpander Rewriter(*SE, DL, "indvars"); #ifndef NDEBUG @@ -1973,7 +2054,7 @@ bool IndVarSimplify::run(Loop *L) { if (LI->getLoopFor(ExitingBB) != L) continue; - if (!needsLFTR(L, ExitingBB)) + if (!needsLFTR(L, ExitingBB, TI)) continue; const SCEV *ExitCount = SE->getExitCount(L, ExitingBB); @@ -1993,7 +2074,8 @@ bool IndVarSimplify::run(Loop *L) { // Avoid high cost expansions. Note: This heuristic is questionable in // that our definition of "high cost" is not exactly principled. - if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, + if (!IsTapirLoop && + Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget, TTI, PreHeader->getTerminator())) continue; @@ -2060,7 +2142,27 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, const DataLayout &DL = F->getParent()->getDataLayout(); IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, - WidenIndVars && AllowIVWidening); + &AR.TI, WidenIndVars && AllowIVWidening, + /*TapirLoopsOnly=*/false); + if (!IVS.run(&L)) + return PreservedAnalyses::all(); + + auto PA = getLoopPassPreservedAnalyses(); + PA.preserveSet(); + if (AR.MSSA) + PA.preserve(); + return PA; +} + +PreservedAnalyses TapirIndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &) { + Function *F = L.getHeader()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + + IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, + &AR.TI, WidenIndVars && AllowIVWidening, + /*TapirLoopsOnly=*/true); if (!IVS.run(&L)) return PreservedAnalyses::all(); diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 5b8f1b00dc0343..832b06c71649c0 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -68,6 +68,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -334,6 +335,8 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, if (!ThreadAcrossLoopHeaders) findLoopHeaders(*F); + findTapirTasks(*F, DT); + bool EverChanged = false; bool Changed; do { @@ -363,6 +366,7 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, << '\n'); LoopHeaders.erase(&BB); LVI->eraseBlock(&BB); + TapirTasks.erase(&BB); DeleteDeadBlock(&BB, DTU.get()); Changed = ChangedSinceLastAnalysisUpdate = true; continue; @@ -392,6 +396,7 @@ bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_, } while (Changed); LoopHeaders.clear(); + TapirTasks.clear(); return EverChanged; } @@ -495,6 +500,11 @@ static unsigned getJumpThreadDuplicationCost(const TargetTransformInfo *TTI, if (CI->cannotDuplicate() || CI->isConvergent()) return ~0U; + // Bail if we discover a taskframe.end intrinsic. + // TODO: Handle taskframe.end like a guard. + if (isTapirIntrinsic(Intrinsic::taskframe_end, &*I)) + return ~0U; + if (TTI->getInstructionCost(&*I, TargetTransformInfo::TCK_SizeAndLatency) == TargetTransformInfo::TCC_Free) continue; @@ -539,6 +549,32 @@ void JumpThreadingPass::findLoopHeaders(Function &F) { LoopHeaders.insert(Edge.second); } +/// findTapirTasks - We must be careful when threading the continuation of a +/// Tapir task, in order to make sure that reattaches always go to the +/// continuation of their associated detaches. To ensure this we first record +/// all the associations between detaches and reattaches. +void JumpThreadingPass::findTapirTasks(Function &F, DominatorTree &DT) { + for (const BasicBlock &BB : F) { + if (const DetachInst *DI = dyn_cast(BB.getTerminator())) { + // Scan the predecessors of the detach continuation for reattaches that + // pair with this detach. + const BasicBlock *Detached = DI->getDetached(); + for (const BasicBlock *PredBB : predecessors(DI->getContinue())) + if (isa(PredBB->getTerminator()) && + DT.dominates(Detached, PredBB)) + TapirTasks[&BB].insert(PredBB); + + if (DI->hasUnwindDest()) + // Scan the predecessors of the detach unwind for detached-rethrows that + // pair with this detach. + for (const BasicBlock *PredBB : predecessors(DI->getUnwindDest())) + if (isDetachedRethrow(PredBB->getTerminator()) && + DT.dominates(Detached, PredBB)) + TapirTasks[&BB].insert(PredBB); + } + } +} + /// getKnownConstant - Helper method to determine if we can thread over a /// terminator with the given value as its condition, and if so what value to /// use for that. What kind of value this is depends on whether we want an @@ -1342,7 +1378,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { } } - if (!PredAvailable) { + if (!PredAvailable || + isa(PredBB->getTerminator())) { OneUnavailablePred = PredBB; continue; } @@ -1385,6 +1422,9 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // unconditional branch, we know that it isn't a critical edge. if (PredsScanned.size() == AvailablePreds.size()+1 && OneUnavailablePred->getTerminator()->getNumSuccessors() == 1) { + // If the predecessor is a reattach, we can't split the edge + if (isa(OneUnavailablePred->getTerminator())) + return false; UnavailablePred = OneUnavailablePred; } else if (PredsScanned.size() != AvailablePreds.size()) { // Otherwise, we had multiple unavailable predecessors or we had a critical @@ -1398,7 +1438,8 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { // Add all the unavailable predecessors to the PredsToSplit list. for (BasicBlock *P : predecessors(LoadBB)) { // If the predecessor is an indirect goto, we can't split the edge. - if (isa(P->getTerminator())) + if (isa(P->getTerminator()) || + isa(P->getTerminator())) return false; if (!AvailablePredSet.count(P)) @@ -1630,6 +1671,43 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB, PredToDestList.emplace_back(Pred, DestBB); } + // For Tapir, remove any edges from detaches, reattaches, or detached-rethrows + // if we are trying to thread only a subset of the the associated detaches, + // reattaches, and detached-rethrows among the predecesors. + erase_if( + PredToDestList, + [&](const std::pair &PredToDest) { + // Bail if the predecessor is not terminated by a detach. + if (isa(PredToDest.first->getTerminator())) { + // If we are threading through a detach-continue or detach-unwind, + // check that all associated reattaches and detached-rethrows are also + // predecessors in PredToDestList. + for (const BasicBlock *TaskPred : TapirTasks[PredToDest.first]) { + if (isa(TaskPred->getTerminator()) || + isDetachedRethrow(TaskPred->getTerminator())) { + return none_of( + PredToDestList, + [&](const std::pair &PredToDest) { + return TaskPred == PredToDest.first; + }); + } + } + } else if (isa(PredToDest.first->getTerminator()) || + isDetachedRethrow(PredToDest.first->getTerminator())) { + // If we have a reattach or detached-rethrow predecessor, check that + // the associated detach is also a predecessor in PredToDestList. + const BasicBlock *ReattachPred = PredToDest.first; + return none_of( + PredToDestList, + [&](const std::pair &PredToDest) { + return isa(PredToDest.first->getTerminator()) && + TapirTasks.count(PredToDest.first) && + TapirTasks[PredToDest.first].contains(ReattachPred); + }); + } + return false; + }); + // If all edges were unthreadable, we fail. if (PredToDestList.empty()) return false; @@ -1902,7 +1980,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) { const Instruction *TI = SinglePred->getTerminator(); if (TI->isExceptionalTerminator() || TI->getNumSuccessors() != 1 || - SinglePred == BB || hasAddressTakenAndUsed(BB)) + isa(TI) || SinglePred == BB || hasAddressTakenAndUsed(BB)) return false; // If SinglePred was a loop header, BB becomes one. @@ -2175,6 +2253,14 @@ bool JumpThreadingPass::maybethreadThroughTwoBasicBlocks(BasicBlock *BB, return false; } + // Similarly, disregard cases where PredPredBB is terminated by a Tapir + // instruction. + if (isa(PredPredBB->getTerminator()) || + isa(PredPredBB->getTerminator()) || + isDetachedRethrow(PredPredBB->getTerminator()) || + isTaskFrameResume(PredPredBB->getTerminator())) + return false; + BasicBlock *SuccBB = CondBr->getSuccessor(PredPredBB == ZeroPred); // If threading to the same block as we come from, we would infinite loop. diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index f8fab03f151d28..7bef3b4c5f929d 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -56,6 +56,7 @@ #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -83,6 +84,7 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include #include using namespace llvm; @@ -158,13 +160,13 @@ static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop, static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE); + const TaskInfo *TI, OptimizationRemarkEmitter *ORE); static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, const Loop *CurLoop, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, OptimizationRemarkEmitter *ORE); static bool isSafeToExecuteUnconditionally( Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, - const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, const TaskInfo *TI, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, AssumptionCache *AC, bool AllowSpeculation); static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU, @@ -201,7 +203,8 @@ struct LoopInvariantCodeMotion { bool runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, ScalarEvolution *SE, MemorySSA *MSSA, - OptimizationRemarkEmitter *ORE, bool LoopNestMode = false); + TaskInfo *TI, OptimizationRemarkEmitter *ORE, + bool LoopNestMode = false); LoopInvariantCodeMotion(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, @@ -249,7 +252,8 @@ struct LegacyLICMPass : public LoopPass { &getAnalysis().getAssumptionCache(*F), &getAnalysis().getTLI(*F), &getAnalysis().getTTI(*F), - SE ? &SE->getSE() : nullptr, MSSA, &ORE); + SE ? &SE->getSE() : nullptr, MSSA, + &getAnalysis().getTaskInfo(), &ORE); } /// This transformation requires natural loop information & requires that @@ -258,6 +262,8 @@ struct LegacyLICMPass : public LoopPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addRequired(); AU.addPreserved(); @@ -288,7 +294,7 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM, LoopInvariantCodeMotion LICM(Opts.MssaOptCap, Opts.MssaNoAccForPromotionCap, Opts.AllowSpeculation); if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, &AR.TTI, - &AR.SE, AR.MSSA, &ORE)) + &AR.SE, AR.MSSA, &AR.TI, &ORE)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); @@ -323,8 +329,9 @@ PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM, Opts.AllowSpeculation); Loop &OutermostLoop = LN.getOutermostLoop(); - bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC, - &AR.TLI, &AR.TTI, &AR.SE, AR.MSSA, &ORE, true); + bool Changed = + LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, &AR.AC, &AR.TLI, + &AR.TTI, &AR.SE, AR.MSSA, &AR.TI, &ORE, true); if (!Changed) return PreservedAnalyses::all(); @@ -388,13 +395,11 @@ llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags( /// Hoist expressions out of the specified loop. Note, alias info for inner /// loop is not preserved so it is not a good idea to run LICM multiple /// times on one loop. -bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, - DominatorTree *DT, AssumptionCache *AC, - TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, - ScalarEvolution *SE, MemorySSA *MSSA, - OptimizationRemarkEmitter *ORE, - bool LoopNestMode) { +bool LoopInvariantCodeMotion::runOnLoop( + Loop *L, AAResults *AA, LoopInfo *LI, DominatorTree *DT, + AssumptionCache *AC, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, + ScalarEvolution *SE, MemorySSA *MSSA, TaskInfo *TI, + OptimizationRemarkEmitter *ORE, bool LoopNestMode) { bool Changed = false; assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form."); @@ -442,16 +447,16 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, // us to sink instructions in one pass, without iteration. After sinking // instructions, we perform another pass to hoist them out of the loop. if (L->hasDedicatedExits()) - Changed |= - LoopNestMode - ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, DT, - TLI, TTI, L, MSSAU, &SafetyInfo, Flags, ORE) - : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - MSSAU, &SafetyInfo, Flags, ORE); + Changed |= LoopNestMode + ? sinkRegionForLoopNest(DT->getNode(L->getHeader()), AA, LI, + DT, TLI, TTI, L, MSSAU, &SafetyInfo, + Flags, TI, ORE) + : sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, + TTI, L, MSSAU, &SafetyInfo, Flags, TI, ORE); Flags.setIsSink(false); if (Preheader) Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, AC, TLI, L, - MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode, + MSSAU, SE, &SafetyInfo, Flags, TI, ORE, LoopNestMode, LicmAllowSpeculation); // Now that all loop invariants have been removed from the loop, promote any @@ -494,7 +499,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, collectPromotionCandidates(MSSA, AA, L)) { LocalPromoted |= promoteLoopAccessesToScalars( PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI, - DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, ORE, + DT, AC, TLI, TTI, L, MSSAU, &SafetyInfo, TI, ORE, LicmAllowSpeculation, HasReadsOutsideSet); } Promoted |= LocalPromoted; @@ -525,6 +530,12 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI, if (Changed && SE) SE->forgetLoopDispositions(); + + if (Changed && TI) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TI->recalculate(*DT->getRoot()->getParent(), *DT); return Changed; } @@ -537,12 +548,12 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, DominatorTree *DT, TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - SinkAndHoistLICMFlags &Flags, + SinkAndHoistLICMFlags &Flags, TaskInfo *TI, OptimizationRemarkEmitter *ORE, Loop *OutermostLoop) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && TI != nullptr && "Unexpected input to sinkRegion."); // We want to visit children before parents. We will enqueue all the parents @@ -584,7 +595,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop, SafetyInfo, TTI, FoldableInLoop, LoopNestMode) && - canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) { + canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, TI, Flags, ORE)) { if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) { if (!FoldableInLoop) { ++II; @@ -606,7 +617,7 @@ bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI, TargetTransformInfo *TTI, Loop *CurLoop, MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, - SinkAndHoistLICMFlags &Flags, + SinkAndHoistLICMFlags &Flags, TaskInfo *TI, OptimizationRemarkEmitter *ORE) { bool Changed = false; @@ -616,7 +627,7 @@ bool llvm::sinkRegionForLoopNest(DomTreeNode *N, AAResults *AA, LoopInfo *LI, while (!Worklist.empty()) { Loop *L = Worklist.pop_back_val(); Changed |= sinkRegion(DT->getNode(L->getHeader()), AA, LI, DT, TLI, TTI, L, - MSSAU, SafetyInfo, Flags, ORE, CurLoop); + MSSAU, SafetyInfo, Flags, TI, ORE, CurLoop); } return Changed; } @@ -859,12 +870,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, TargetLibraryInfo *TLI, Loop *CurLoop, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, ICFLoopSafetyInfo *SafetyInfo, - SinkAndHoistLICMFlags &Flags, + SinkAndHoistLICMFlags &Flags, TaskInfo *TI, OptimizationRemarkEmitter *ORE, bool LoopNestMode, bool AllowSpeculation) { // Verify inputs. assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr && - CurLoop != nullptr && SafetyInfo != nullptr && + CurLoop != nullptr && SafetyInfo != nullptr && TI != nullptr && "Unexpected input to hoistRegion."); ControlFlowHoister CFH(LI, DT, CurLoop, MSSAU); @@ -895,12 +906,12 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, // and we have accurately duplicated the control flow from the loop header // to that block. if (CurLoop->hasLoopInvariantOperands(&I) && - canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) && + canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, TI, Flags, ORE) && isSafeToExecuteUnconditionally( - I, DT, TLI, CurLoop, SafetyInfo, ORE, + I, DT, TLI, CurLoop, SafetyInfo, TI, ORE, Preheader->getTerminator(), AC, AllowSpeculation)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, TI, ORE); HoistedInstructions.push_back(&I); Changed = true; continue; @@ -926,7 +937,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, eraseInstruction(I, *SafetyInfo, MSSAU); hoist(*ReciprocalDivisor, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), - SafetyInfo, MSSAU, SE, ORE); + SafetyInfo, MSSAU, SE, TI, ORE); HoistedInstructions.push_back(ReciprocalDivisor); Changed = true; continue; @@ -938,14 +949,14 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, match(&I, m_Intrinsic()); }; auto MustExecuteWithoutWritesBefore = [&](Instruction &I) { - return SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop) && + return SafetyInfo->isGuaranteedToExecute(I, DT, TI, CurLoop) && SafetyInfo->doesNotWriteMemoryBefore(I, CurLoop); }; if ((IsInvariantStart(I) || isGuard(&I)) && CurLoop->hasLoopInvariantOperands(&I) && MustExecuteWithoutWritesBefore(I)) { hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, TI, ORE); HoistedInstructions.push_back(&I); Changed = true; continue; @@ -959,7 +970,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI, PN->setIncomingBlock( i, CFH.getOrCreateHoistedBlock(PN->getIncomingBlock(i))); hoist(*PN, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo, - MSSAU, SE, ORE); + MSSAU, SE, TI, ORE); assert(DT->dominates(PN, BB) && "Conditional PHIs not expected"); Changed = true; continue; @@ -1153,7 +1164,7 @@ static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA, bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, Loop *CurLoop, MemorySSAUpdater &MSSAU, - bool TargetExecutesOncePerLoop, + bool TargetExecutesOncePerLoop, TaskInfo *TI, SinkAndHoistLICMFlags &Flags, OptimizationRemarkEmitter *ORE) { // If we don't understand the instruction, bail early. @@ -1235,7 +1246,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, // A readonly argmemonly function only reads from memory pointed to by // it's arguments with arbitrary offsets. If we can prove there are no // writes to this memory in the loop, we can hoist or sink. - if (Behavior.onlyAccessesArgPointees()) { + if (Behavior.onlyAccessesArgPointees() || CI->isStrandPure()) { // TODO: expand to writeable arguments for (Value *Op : CI->args()) if (Op->getType()->isPointerTy() && @@ -1554,6 +1565,11 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, CurLoop->getUniqueExitBlocks(ExitBlocks); SmallPtrSet ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); + + // Get the Tapir task exits for the current loop, in order to check for users + // contained in those task exits. + SmallPtrSet CurLoopTaskExits; + CurLoop->getTaskExits(CurLoopTaskExits); #endif BasicBlock *ExitBB = PN->getParent(); assert(ExitBlockSet.count(ExitBB) && "Expect the PHI is in an exit block."); @@ -1594,8 +1610,15 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT, SmallSetVector PredBBs(pred_begin(ExitBB), pred_end(ExitBB)); while (!PredBBs.empty()) { BasicBlock *PredBB = *PredBBs.begin(); - assert(CurLoop->contains(PredBB) && + assert((CurLoop->contains(PredBB) || CurLoopTaskExits.count(PredBB)) && "Expect all predecessors are in the loop"); + // Don't split loop-exit predecessor blocks terminated by a detach or + // detached.rethrow. + if (isa(PredBB->getTerminator()) || + isDetachedRethrow(PredBB->getTerminator())) { + PredBBs.remove(PredBB); + continue; + } if (PN->getBasicBlockIndex(PredBB) >= 0) { BasicBlock *NewPred = SplitBlockPredecessors( ExitBB, PredBB, ".split.loop.exit", DT, LI, MSSAU, true); @@ -1623,6 +1646,11 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, bool Changed = false; LLVM_DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n"); + // Get the Tapir task exits for the current loop, in order to check for users + // contained in those task exits. + SmallPtrSet CurLoopTaskExits; + CurLoop->getTaskExits(CurLoopTaskExits); + // Iterate over users to be ready for actual sinking. Replace users via // unreachable blocks with undef and make all user PHIs trivially replaceable. SmallPtrSet VisitedUsers; @@ -1631,7 +1659,8 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, Use &U = UI.getUse(); ++UI; - if (VisitedUsers.count(User) || CurLoop->contains(User)) + if (VisitedUsers.count(User) || CurLoop->contains(User) || + CurLoopTaskExits.count(User->getParent())) continue; if (!DT->isReachableFromEntry(User->getParent())) { @@ -1727,7 +1756,7 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT, static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU, ScalarEvolution *SE, - OptimizationRemarkEmitter *ORE) { + const TaskInfo *TI, OptimizationRemarkEmitter *ORE) { LLVM_DEBUG(dbgs() << "LICM hoisting to " << Dest->getNameOrAsOperand() << ": " << I << "\n"); ORE->emit([&]() { @@ -1747,7 +1776,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // The check on hasMetadataOtherThanDebugLoc is to prevent us from burning // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. - !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) + !SafetyInfo->isGuaranteedToExecute(I, DT, TI, CurLoop)) I.dropUBImplyingAttrsAndMetadata(); if (isa(I)) @@ -1771,15 +1800,41 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, /// or if it is a trapping instruction and is guaranteed to execute. static bool isSafeToExecuteUnconditionally( Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI, - const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, + const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo, const TaskInfo *TI, OptimizationRemarkEmitter *ORE, const Instruction *CtxI, AssumptionCache *AC, bool AllowSpeculation) { + if (CtxI) { + // Check for a load from a thread_local variable in a different spindle as + // CtxI. Loads from such variables are not safe to execute unconditionally + // outside of parallel loops. + if (LoadInst *LI = dyn_cast(&Inst)) { + if (GlobalValue *GV = dyn_cast( + getUnderlyingObject(LI->getPointerOperand()))) { + if (GV->isThreadLocal() && TI->getSpindleFor(Inst.getParent()) != + TI->getSpindleFor(CtxI->getParent())) + return false; + } + } + } + if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, AC, DT, TLI)) return true; + if (CtxI) { + // Check for a call to a strand-pure function. Such a call is safe to + // execute unconditionally if CtxI and Inst belong to the same spindle. + if (const CallBase *CB = dyn_cast(&Inst)) { + const Function *Callee = CB->getCalledFunction(); + if (Callee && Callee->isStrandPure()) + if (TI->getSpindleFor(Inst.getParent()) != + TI->getSpindleFor(CtxI->getParent())) + return false; + } + } + bool GuaranteedToExecute = - SafetyInfo->isGuaranteedToExecute(Inst, DT, CurLoop); + SafetyInfo->isGuaranteedToExecute(Inst, DT, TI, CurLoop); if (!GuaranteedToExecute) { auto *LI = dyn_cast(&Inst); @@ -1974,12 +2029,12 @@ bool llvm::promoteLoopAccessesToScalars( SmallVectorImpl &MSSAInsertPts, PredIteratorCache &PIC, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, const TargetLibraryInfo *TLI, TargetTransformInfo *TTI, Loop *CurLoop, - MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, + MemorySSAUpdater &MSSAU, ICFLoopSafetyInfo *SafetyInfo, TaskInfo *TI, OptimizationRemarkEmitter *ORE, bool AllowSpeculation, bool HasReadsOutsideSet) { // Verify inputs. assert(LI != nullptr && DT != nullptr && CurLoop != nullptr && - SafetyInfo != nullptr && + SafetyInfo != nullptr && TI != nullptr && "Unexpected Input to promoteLoopAccessesToScalars"); LLVM_DEBUG({ @@ -2039,6 +2094,16 @@ bool llvm::promoteLoopAccessesToScalars( StoreSafetyUnknown, } StoreSafety = StoreSafetyUnknown; + // We cannot speculate loads to values that are stored in a detached + // context within the loop. Precompute whether or not there is a + // detach within this loop. + bool DetachWithinLoop = + isa(CurLoop->getHeader()->getTerminator()) || + llvm::any_of(CurLoop->getBlocks(), + [](const BasicBlock *BB) { + return isa(BB->getTerminator()); + }); + SmallVector LoopUses; // We start with an alignment of one and try to find instructions that allow @@ -2096,7 +2161,7 @@ bool llvm::promoteLoopAccessesToScalars( // alignment as well. if (!DereferenceableInPH || (InstAlignment > Alignment)) if (isSafeToExecuteUnconditionally( - *Load, DT, TLI, CurLoop, SafetyInfo, ORE, + *Load, DT, TLI, CurLoop, SafetyInfo, TI, ORE, Preheader->getTerminator(), AC, AllowSpeculation)) { DereferenceableInPH = true; Alignment = std::max(Alignment, InstAlignment); @@ -2109,6 +2174,20 @@ bool llvm::promoteLoopAccessesToScalars( if (!Store->isUnordered()) return false; + // We conservatively avoid promoting stores that are detached + // within the loop. Technically it can be legal to move these + // stores -- the program already contains a determinacy race + // -- but to preserve the serial execution, we have to avoid + // moving stores that are loaded. For now, we simply avoid + // moving these stores. + if (DetachWithinLoop && + CurLoop->contains(TI->getTaskFor(Store->getParent())->getEntry())) + return false; + + // Note that we only check GuaranteedToExecute inside the store case + // so that we do not introduce stores where they did not exist before + // (which would break the LLVM concurrency model). + SawUnorderedAtomic |= Store->isAtomic(); SawNotAtomic |= !Store->isAtomic(); @@ -2119,7 +2198,7 @@ bool llvm::promoteLoopAccessesToScalars( // raise the alignment on the promoted store. Align InstAlignment = Store->getAlign(); bool GuaranteedToExecute = - SafetyInfo->isGuaranteedToExecute(*UI, DT, CurLoop); + SafetyInfo->isGuaranteedToExecute(*UI, DT, TI, CurLoop); StoreIsGuanteedToExecute |= GuaranteedToExecute; if (GuaranteedToExecute) { DereferenceableInPH = true; diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp index c041e3621a16bd..84a80033fd3a81 100644 --- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/PatternMatch.h" @@ -435,7 +436,7 @@ breakBackedgeIfNotTaken(Loop *L, DominatorTree &DT, ScalarEvolution &SE, /// instructions out of the loop. static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, ScalarEvolution &SE, LoopInfo &LI, - MemorySSA *MSSA, + TaskInfo &TI, MemorySSA *MSSA, OptimizationRemarkEmitter &ORE) { assert(L->isLCSSAForm(DT) && "Expected LCSSA!"); @@ -468,7 +469,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, L->getHeader()) << "Loop deleted because it never executes"; }); - deleteDeadLoop(L, &DT, &SE, &LI, MSSA); + deleteDeadLoop(L, &DT, &SE, &LI, &TI, MSSA); ++NumDeleted; return LoopDeletionResult::Deleted; } @@ -508,7 +509,7 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT, L->getHeader()) << "Loop deleted because it is invariant"; }); - deleteDeadLoop(L, &DT, &SE, &LI, MSSA); + deleteDeadLoop(L, &DT, &SE, &LI, &TI, MSSA); ++NumDeleted; return LoopDeletionResult::Deleted; @@ -525,7 +526,7 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM, // pass. Function analyses need to be preserved across loop transformations // but ORE cannot be preserved (see comment before the pass definition). OptimizationRemarkEmitter ORE(L.getHeader()->getParent()); - auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.MSSA, ORE); + auto Result = deleteLoopIfDead(&L, AR.DT, AR.SE, AR.LI, AR.TI, AR.MSSA, ORE); // If we can prove the backedge isn't taken, just break it and be done. This // leaves the loop structure in place which means it can handle dispatching diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp index 27196e46ca5666..d6bafd5595ddaf 100644 --- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp @@ -39,6 +39,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 8572a442e784ae..6bbd32f861aee9 100644 --- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -58,6 +58,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -271,6 +272,10 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM, if (!LIR.runOnLoop(&L)) return PreservedAnalyses::all(); + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + AR.TI.recalculate(*AR.DT.getRoot()->getParent(), AR.DT); + auto PA = getLoopPassPreservedAnalyses(); if (AR.MSSA) PA.preserve(); diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp index 179ccde8d03552..87b89c522fec80 100644 --- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -37,6 +37,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp index 2c8a3351281bd3..108b7f7ac8c99b 100644 --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -12,6 +12,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Support/TimeProfiler.h" @@ -235,6 +236,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, AM.getResult(F), AM.getResult(F), AM.getResult(F), + AM.getResult(F), BFI, BPI, MSSA}; @@ -358,6 +360,7 @@ PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F, PA.preserve(); if (UseMemorySSA) PA.preserve(); + PA.preserve(); return PA; } diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp index eee8550587064f..6479dde52c8d21 100644 --- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp +++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -74,9 +75,10 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM, std::optional MSSAU; if (AR.MSSA) MSSAU = MemorySSAUpdater(AR.MSSA); - bool Changed = LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, - MSSAU ? &*MSSAU : nullptr, SQ, false, Threshold, - false, PrepareForLTO || PrepareForLTOOption); + bool Changed = + LoopRotation(&L, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, + MSSAU ? &*MSSAU : nullptr, &AR.TI, SQ, false, Threshold, + false, PrepareForLTO || PrepareForLTOOption); if (!Changed) return PreservedAnalyses::all(); @@ -131,6 +133,7 @@ class LoopRotateLegacyPass : public LoopPass { auto *AC = &getAnalysis().getAssumptionCache(F); auto &DT = getAnalysis().getDomTree(); auto &SE = getAnalysis().getSE(); + auto &TI = getAnalysis().getTaskInfo(); const SimplifyQuery SQ = getBestSimplifyQuery(*this, F); std::optional MSSAU; // Not requiring MemorySSA and getting it only if available will split @@ -145,8 +148,8 @@ class LoopRotateLegacyPass : public LoopPass { ? DefaultRotationThreshold : MaxHeaderSize; - return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU ? &*MSSAU : nullptr, SQ, - false, Threshold, false, + return LoopRotation(L, LI, TTI, AC, &DT, &SE, MSSAU ? &*MSSAU : nullptr, + &TI, SQ, false, Threshold, false, PrepareForLTO || PrepareForLTOOption); } }; diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index 8d59fdff9236f8..146bff43e28fa5 100644 --- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/InitializePasses.h" @@ -729,6 +730,11 @@ PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM, if (DeleteCurrentLoop) LPMU.markLoopAsDeleted(L, "loop-simplifycfg"); + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + AR.TI.recalculate(*AR.DT.getRoot()->getParent(), AR.DT); + auto PA = getLoopPassPreservedAnalyses(); if (AR.MSSA) PA.preserve(); @@ -750,6 +756,8 @@ class LoopSimplifyCFGLegacyPass : public LoopPass { DominatorTree &DT = getAnalysis().getDomTree(); LoopInfo &LI = getAnalysis().getLoopInfo(); ScalarEvolution &SE = getAnalysis().getSE(); + auto *TIWP = getAnalysisIfAvailable(); + TaskInfo *TI = TIWP ? &TIWP->getTaskInfo() : nullptr; auto *MSSAA = getAnalysisIfAvailable(); std::optional MSSAU; if (MSSAA) @@ -761,6 +769,11 @@ class LoopSimplifyCFGLegacyPass : public LoopPass { DeleteCurrentLoop); if (DeleteCurrentLoop) LPM.markLoopAsDeleted(*L); + if (TI && Changed) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less + // computationally wasteful. + TI->recalculate(*DT.getRoot()->getParent(), DT); return Changed; } diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp index 597c159682c5c0..3ad85a6fe4499b 100644 --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -40,6 +40,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/InitializePasses.h" @@ -282,7 +283,8 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, DominatorTree &DT, BlockFrequencyInfo &BFI, MemorySSA &MSSA, - ScalarEvolution *SE) { + ScalarEvolution *SE, + TaskInfo *TI) { BasicBlock *Preheader = L.getLoopPreheader(); assert(Preheader && "Expected loop to have preheader"); @@ -325,7 +327,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI, // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(&I) && "Insts in a loop's preheader should have loop invariant operands!"); - if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, LICMFlags)) + if (!canSinkOrHoistInst(I, &AA, &DT, &L, MSSAU, false, TI, LICMFlags)) continue; if (sinkInstruction(L, I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI, &MSSAU)) { @@ -351,6 +353,7 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { AAResults &AA = FAM.getResult(F); DominatorTree &DT = FAM.getResult(F); + TaskInfo &TI = FAM.getResult(F); BlockFrequencyInfo &BFI = FAM.getResult(F); MemorySSA &MSSA = FAM.getResult(F).getMSSA(); @@ -373,7 +376,7 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) { // loops in SCEV and we don't preserve (or request) SCEV at all making that // unnecessary. Changed |= sinkLoopInvariantInstructions(L, AA, LI, DT, BFI, MSSA, - /*ScalarEvolution*/ nullptr); + /*ScalarEvolution*/ nullptr, &TI); } while (!PreorderLoops.empty()); if (!Changed) @@ -412,11 +415,13 @@ struct LegacyLoopSinkPass : public LoopPass { AAResults &AA = getAnalysis().getAAResults(); MemorySSA &MSSA = getAnalysis().getMSSA(); auto *SE = getAnalysisIfAvailable(); + auto *TI = getAnalysisIfAvailable(); bool Changed = sinkLoopInvariantInstructions( *L, AA, getAnalysis().getLoopInfo(), getAnalysis().getDomTree(), getAnalysis().getBFI(), - MSSA, SE ? &SE->getSE() : nullptr); + MSSA, SE ? &SE->getSE() : nullptr, + TI ? &TI->getTaskInfo() : nullptr); if (VerifyMemorySSA) MSSA.verifyMemorySSA(); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index a4369b83e732fc..5c023b2e5237dc 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -76,6 +76,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ScalarEvolutionNormalization.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -6066,6 +6067,7 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addPreserved(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); } @@ -7050,11 +7052,19 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { *L->getHeader()->getParent()); auto &TLI = getAnalysis().getTLI( *L->getHeader()->getParent()); + auto *TIWP = getAnalysisIfAvailable(); + auto *TI = TIWP ? &TIWP->getTaskInfo() : nullptr; auto *MSSAAnalysis = getAnalysisIfAvailable(); MemorySSA *MSSA = nullptr; if (MSSAAnalysis) MSSA = &MSSAAnalysis->getMSSA(); - return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); + bool Changed = ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); + if (TI && Changed) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TI->recalculate(*DT.getRoot()->getParent(), DT); + return Changed; } PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, @@ -7064,6 +7074,11 @@ PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA)) return PreservedAnalyses::all(); + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + AR.TI.recalculate(*AR.DT.getRoot()->getParent(), AR.DT); + auto PA = getLoopPassPreservedAnalyses(); if (AR.MSSA) PA.preserve(); diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 335b489d3cb25b..3fdef3e2f2e8dc 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -1119,20 +1120,19 @@ bool llvm::computeUnrollCount( return ExplicitUnroll; } -static LoopUnrollResult -tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, - const TargetTransformInfo &TTI, AssumptionCache &AC, - OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, - ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, - bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, - std::optional ProvidedCount, - std::optional ProvidedThreshold, - std::optional ProvidedAllowPartial, - std::optional ProvidedRuntime, - std::optional ProvidedUpperBound, - std::optional ProvidedAllowPeeling, - std::optional ProvidedAllowProfileBasedPeeling, - std::optional ProvidedFullUnrollMaxCount) { +static LoopUnrollResult tryToUnrollLoop( + Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + const TargetTransformInfo &TTI, AssumptionCache &AC, TaskInfo *TI, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel, + bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV, + std::optional ProvidedCount, + std::optional ProvidedThreshold, + std::optional ProvidedAllowPartial, + std::optional ProvidedRuntime, std::optional ProvidedUpperBound, + std::optional ProvidedAllowPeeling, + std::optional ProvidedAllowProfileBasedPeeling, + std::optional ProvidedFullUnrollMaxCount) { LLVM_DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName() << "] Loop %" @@ -1328,7 +1328,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, L, {UP.Count, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, UP.UnrollRemainder, ForgetAllSCEV}, - LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); + LI, &SE, &DT, &AC, TI, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; @@ -1416,6 +1416,7 @@ class LoopUnroll : public LoopPass { auto &DT = getAnalysis().getDomTree(); LoopInfo *LI = &getAnalysis().getLoopInfo(); + TaskInfo *TI = &getAnalysis().getTaskInfo(); ScalarEvolution &SE = getAnalysis().getSE(); const TargetTransformInfo &TTI = getAnalysis().getTTI(F); @@ -1427,7 +1428,8 @@ class LoopUnroll : public LoopPass { bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); LoopUnrollResult Result = tryToUnrollLoop( - L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel, + L, DT, LI, SE, TTI, AC, TI, ORE, nullptr, nullptr, PreserveLCSSA, + OptLevel, /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound, ProvidedAllowPeeling, @@ -1497,7 +1499,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, std::string LoopName = std::string(L.getName()); bool Changed = - tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE, + tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, &AR.TI, ORE, /*BFI*/ nullptr, /*PSI*/ nullptr, /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true, OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt, @@ -1510,7 +1512,7 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM, if (!Changed) return PreservedAnalyses::all(); - // The parent must not be damaged by unrolling! + // The parent must not be damaged by unrolling! #ifndef NDEBUG if (ParentL) ParentL->verifyLoop(); @@ -1574,6 +1576,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, auto &TTI = AM.getResult(F); auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); + auto &TI = AM.getResult(F); auto &ORE = AM.getResult(F); LoopAnalysisManager *LAM = nullptr; @@ -1599,6 +1602,13 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, Changed |= formLCSSARecursively(*L, DT, &LI, &SE); } + if (Changed) + // Update TaskInfo manually using the updated DT. + // + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI.recalculate(*DT.getRoot()->getParent(), DT); + // Add the loop nests in the reverse order of LoopInfo. See method // declaration. SmallPriorityWorklist Worklist; @@ -1624,7 +1634,7 @@ PreservedAnalyses LoopUnrollPass::run(Function &F, // The API here is quite complex to call and we allow to select some // flavors of unrolling during construction time (by setting UnrollOpts). LoopUnrollResult Result = tryToUnrollLoop( - &L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI, + &L, DT, &LI, SE, TTI, AC, &TI, ORE, BFI, PSI, /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false, UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt, diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp index 13e06c79d0d7eb..91506c4f00f629 100644 --- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp +++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp @@ -577,7 +577,7 @@ PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM, const Function *F = L.getHeader()->getParent(); OptimizationRemarkEmitter ORE(F); - LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, nullptr); + LoopAccessInfoManager LAIs(*SE, *AA, *DT, LAR.LI, LAR.TI, nullptr); if (!LoopVersioningLICM(AA, SE, &ORE, LAIs, LAR.LI, &L).run(DT)) return PreservedAnalyses::all(); return getLoopPassPreservedAnalyses(); diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 983a75e1d708dc..1ca242fe99bab2 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constant.h" @@ -113,6 +114,8 @@ STATISTIC( "Number of stores rewritten into predicated loads to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumNotParallelPromotable, "Number of alloca's not promotable due to " + "Tapir instructions"); /// Hidden option to experiment with completely strict handling of inbounds /// GEPs. @@ -2653,8 +2656,11 @@ class llvm::sroa::AllocaSliceRewriter Value *rewriteIntegerLoad(LoadInst &LI) { assert(IntTy && "We cannot insert an integer to the alloca"); assert(!LI.isVolatile()); - Value *V = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, - NewAI.getAlign(), "load"); + LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), &NewAI, + NewAI.getAlign(), "load"); + if (LI.isAtomic()) + NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); + Value *V = NewLI; V = convertValue(DL, IRB, V, IntTy); assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset"); uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset; @@ -2836,6 +2842,9 @@ class llvm::sroa::AllocaSliceRewriter if (AATags) Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (SI.isAtomic()) + Store->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); + migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, Store, Store->getPointerOperand(), Store->getValueOperand(), DL); @@ -4345,6 +4354,8 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // a direct store) as needing to be resplit because it is no longer // promotable. if (AllocaInst *OtherAI = dyn_cast(StoreBasePtr)) { + assert(TI->isAllocaParallelPromotable(OtherAI) && + "Alloca must be promotable"); ResplitPromotableAllocas.insert(OtherAI); Worklist.insert(OtherAI); } else if (AllocaInst *OtherAI = dyn_cast( @@ -4468,6 +4479,8 @@ bool SROAPass::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { if (!SplitLoads) { if (AllocaInst *OtherAI = dyn_cast(LoadBasePtr)) { assert(OtherAI != &AI && "We can't re-split our own alloca!"); + assert(TI->isAllocaParallelPromotable(OtherAI) && + "Alloca must be promotable"); ResplitPromotableAllocas.insert(OtherAI); Worklist.insert(OtherAI); } else if (AllocaInst *OtherAI = dyn_cast( @@ -4663,6 +4676,11 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS, NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops)); } + // Check if any detaches block promotion. + if (!TI->isAllocaParallelPromotable(NewAI)) + ++NumNotParallelPromotable; + Promotable &= TI->isAllocaParallelPromotable(NewAI); + if (Promotable) { for (Use *U : AS.getDeadUsesIfPromotable()) { auto *OldInst = dyn_cast(U->get()); @@ -4673,6 +4691,8 @@ AllocaInst *SROAPass::rewritePartition(AllocaInst &AI, AllocaSlices &AS, } if (PHIUsers.empty() && SelectUsers.empty()) { // Promote the alloca. + assert(TI->isAllocaParallelPromotable(NewAI) && + "Alloca must be promotable"); PromotableAllocas.push_back(NewAI); } else { // If we have either PHIs or Selects to speculate, add them to those @@ -5053,7 +5073,7 @@ bool SROAPass::promoteAllocas(Function &F) { LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n"); } else { LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); - PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC); + PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC, TI); } PromotableAllocas.clear(); @@ -5061,22 +5081,35 @@ bool SROAPass::promoteAllocas(Function &F) { } PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU, - AssumptionCache &RunAC) { + AssumptionCache &RunAC, TaskInfo &RunTI) { LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); C = &F.getContext(); DTU = &RunDTU; AC = &RunAC; + TI = &RunTI; + + // Scan the function to get its entry block and all entry blocks of detached + // CFG's. We can perform this scan for entry blocks once for the function, + // because this pass preserves the CFG. + SmallVector EntryBlocks; + for (Task *T : depth_first(TI->getRootTask())) { + EntryBlocks.push_back(T->getEntry()); + if (Value *TaskFrame = T->getTaskFrameUsed()) + EntryBlocks.push_back(cast(TaskFrame)->getParent()); + } const DataLayout &DL = F.getParent()->getDataLayout(); - BasicBlock &EntryBB = F.getEntryBlock(); - for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); - I != E; ++I) { - if (AllocaInst *AI = dyn_cast(I)) { - if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() && - isAllocaPromotable(AI)) - PromotableAllocas.push_back(AI); - else - Worklist.insert(AI); + for (BasicBlock *BB : EntryBlocks) { + BasicBlock &EntryBB = *BB; + for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end()); + I != E; ++I) { + if (AllocaInst *AI = dyn_cast(I)) { + if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() && + isAllocaPromotable(AI) && TI->isAllocaParallelPromotable(AI)) + PromotableAllocas.push_back(AI); + else + Worklist.insert(AI); + } } } @@ -5104,6 +5137,13 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU, llvm::erase_if(PromotableAllocas, IsInSet); DeletedAllocas.clear(); } + + // Preserve TaskInfo by manually updating it based on the updated DT. + if (IterationCFGChanged && TI) { + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI->recalculate(F, DTU->getDomTree()); + } } Changed |= promoteAllocas(F); @@ -5128,19 +5168,21 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU, if (!CFGChanged) PA.preserveSet(); PA.preserve(); + PA.preserve(); return PA; } PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC) { + AssumptionCache &RunAC, TaskInfo &RunTI) { DomTreeUpdater DTU(RunDT, DomTreeUpdater::UpdateStrategy::Lazy); - return runImpl(F, DTU, RunAC); + return runImpl(F, DTU, RunAC, RunTI); } PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); AssumptionCache &AC = AM.getResult(F); - return runImpl(F, DT, AC); + TaskInfo &TI = AM.getResult(F); + return runImpl(F, DT, AC, TI); } void SROAPass::printPipeline( @@ -5175,15 +5217,18 @@ class llvm::sroa::SROALegacyPass : public FunctionPass { auto PA = Impl.runImpl( F, getAnalysis().getDomTree(), - getAnalysis().getAssumptionCache(F)); + getAnalysis().getAssumptionCache(F), + getAnalysis().getTaskInfo()); return !PA.areAllPreserved(); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } StringRef getPassName() const override { return "SROA"; } @@ -5200,5 +5245,6 @@ INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates", false, false) diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 633d077e64927e..b9809e89e7847e 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -42,6 +43,7 @@ #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/ProfDataUtils.h" #include "llvm/IR/Use.h" @@ -60,6 +62,7 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -1198,6 +1201,13 @@ static BasicBlock *buildClonedLoopBlocks( if (!SkipBlock(LoopBB)) CloneBlock(LoopBB); + // Clone any task-exit blocks in the loop as well. + SmallPtrSet TaskExitBlocks; + L.getTaskExits(TaskExitBlocks); + for (auto *LoopBB : TaskExitBlocks) + if (!SkipBlock(LoopBB)) + CloneBlock(LoopBB); + // Split all the loop exit edges so that when we clone the exit blocks, if // any of the exit blocks are *also* a preheader for some other loop, we // don't create multiple predecessors entering the loop header. @@ -2127,7 +2137,7 @@ static void unswitchNontrivialInvariants( IVConditionInfo &PartialIVInfo, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + ScalarEvolution *SE, TaskInfo *TaskI, MemorySSAUpdater *MSSAU, function_ref DestroyLoopCB, bool InsertFreeze, bool InjectedCondition) { auto *ParentBB = TI.getParent(); @@ -2199,7 +2209,7 @@ static void unswitchNontrivialInvariants( // Compute the parent loop now before we start hacking on things. Loop *ParentL = L.getParentLoop(); // Get blocks in RPO order for MSSA update, before changing the CFG. - LoopBlocksRPO LBRPO(&L); + LoopBlocksRPO LBRPO(&L, /*IncludeTaskExits*/ true); if (MSSAU) LBRPO.perform(&LI); @@ -2284,7 +2294,7 @@ static void unswitchNontrivialInvariants( // guaranteed no reach implicit null check after following this branch. ICFLoopSafetyInfo SafetyInfo; SafetyInfo.computeLoopSafetyInfo(&L); - if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, TaskI, &L)) TI.setMetadata(LLVMContext::MD_make_implicit, nullptr); } } @@ -3209,13 +3219,36 @@ static bool collectUnswitchCandidatesWithInjections( return Found; } +static bool +checkTapirSyncRegionInLoop(const Loop &L, + const SmallPtrSetImpl &TaskExits, + const Instruction &I) { + for (const User *Usr : I.users()) + if (const Instruction *UsrI = dyn_cast(Usr)) { + const BasicBlock *Parent = UsrI->getParent(); + if (!L.contains(Parent) && !TaskExits.contains(Parent)) + return false; + } + return true; +} + static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { if (!L.isSafeToClone()) return false; + SmallPtrSet TaskExits; + L.getTaskExits(TaskExits); for (auto *BB : L.blocks()) for (auto &I : *BB) { - if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) + if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB)) { + if (isTapirIntrinsic(Intrinsic::syncregion_start, &I)) { + if (!checkTapirSyncRegionInLoop(L, TaskExits, I)) + return false; + // All uses of this syncregion.start are inside of the loop, so it's + // safe for unswitching. + continue; + } return false; + } if (auto *CB = dyn_cast(&I)) { assert(!CB->cannotDuplicate() && "Checked by L.isSafeToClone()."); if (CB->isConvergent()) @@ -3229,7 +3262,7 @@ static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) { // loops "out of thin air". If we ever discover important use cases for doing // this, we can add support to loop unswitch, but it is a lot of complexity // for what seems little or no real world benefit. - LoopBlocksRPO RPOT(&L); + LoopBlocksRPO RPOT(&L, /*IncludeTaskExits*/ true); RPOT.perform(&LI); if (containsIrreducibleCFG(RPOT, LI)) return false; @@ -3410,14 +3443,14 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate( // of the loop. Insert a freeze to prevent this case. // 3. The branch condition may be poison or undef static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT, - AssumptionCache &AC) { + AssumptionCache &AC, TaskInfo *TaskI) { assert(isa(TI) || isa(TI)); if (!FreezeLoopUnswitchCond) return false; ICFLoopSafetyInfo SafetyInfo; SafetyInfo.computeLoopSafetyInfo(&L); - if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L)) + if (SafetyInfo.isGuaranteedToExecute(TI, &DT, TaskI, &L)) return false; Value *Cond; @@ -3433,7 +3466,7 @@ static bool unswitchBestCondition( Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + ScalarEvolution *SE, TaskInfo *TaskI, MemorySSAUpdater *MSSAU, function_ref DestroyLoopCB) { // Collect all invariant conditions within this loop (as opposed to an inner // loop which would be handled when visiting that inner loop). @@ -3491,14 +3524,14 @@ static bool unswitchBestCondition( if (isGuard(Best.TI)) Best.TI = turnGuardIntoBranch(cast(Best.TI), L, DT, LI, MSSAU); - InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC); + InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC, TaskI); } LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost << ") terminator: " << *Best.TI << "\n"); unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT, - LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB, - InsertFreeze, InjectedCondition); + LI, AC, UnswitchCB, SE, TaskI, MSSAU, + DestroyLoopCB, InsertFreeze, InjectedCondition); return true; } @@ -3528,7 +3561,7 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, AAResults &AA, TargetTransformInfo &TTI, bool Trivial, bool NonTrivial, function_ref)> UnswitchCB, - ScalarEvolution *SE, MemorySSAUpdater *MSSAU, + ScalarEvolution *SE, TaskInfo *TaskI, MemorySSAUpdater *MSSAU, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, function_ref DestroyLoopCB) { assert(L.isRecursivelyLCSSAForm(DT, LI) && @@ -3612,8 +3645,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, // Try to unswitch the best invariant condition. We prefer this full unswitch to // a partial unswitch when possible below the threshold. - if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU, - DestroyLoopCB)) + if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, TaskI, + MSSAU, DestroyLoopCB)) return true; // No other opportunities to unswitch. @@ -3686,8 +3719,8 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, AR.MSSA->verifyMemorySSA(); } if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial, - UnswitchCB, &AR.SE, MSSAU ? &*MSSAU : nullptr, PSI, AR.BFI, - DestroyLoopCB)) + UnswitchCB, &AR.SE, &AR.TI, MSSAU ? &*MSSAU : nullptr, PSI, + AR.BFI, DestroyLoopCB)) return PreservedAnalyses::all(); if (AR.MSSA && VerifyMemorySSA) @@ -3697,6 +3730,11 @@ PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, // in asserts builds. assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast)); + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + AR.TI.recalculate(F, AR.DT); + auto PA = getLoopPassPreservedAnalyses(); if (AR.MSSA) PA.preserve(); @@ -3756,6 +3794,7 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { auto &TTI = getAnalysis().getTTI(F); MemorySSA *MSSA = &getAnalysis().getMSSA(); MemorySSAUpdater MSSAU(MSSA); + auto &TI = getAnalysis().getTaskInfo(); auto *SEWP = getAnalysisIfAvailable(); auto *SE = SEWP ? &SEWP->getSE() : nullptr; @@ -3788,7 +3827,7 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { MSSA->verifyMemorySSA(); bool Changed = unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE, - &MSSAU, nullptr, nullptr, DestroyLoopCB); + &TI, &MSSAU, nullptr, nullptr, DestroyLoopCB); if (VerifyMemorySSA) MSSA->verifyMemorySSA(); @@ -3797,6 +3836,12 @@ bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { // in asserts builds. assert(DT.verify(DominatorTree::VerificationLevel::Fast)); + if (Changed) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TI.recalculate(F, DT); + return Changed; } diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 7017f6adf3a2bb..c4284a26d45268 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -39,6 +39,9 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Local.h" @@ -217,6 +220,73 @@ static bool tailMergeBlocksWithSimilarFunctionTerminators(Function &F, return Changed; } +static bool removeUselessSyncs(Function &F, DomTreeUpdater *DTU) { + bool Changed = false; + // Scan all the blocks in the function + check: + for (BasicBlock &BB : make_early_inc_range(F)) { + if (DTU && DTU->isBBPendingDeletion(&BB)) + continue; + if (SyncInst *Sync = dyn_cast(BB.getTerminator())) { + // Walk the CFG backwards to try to find a reaching detach instruction. + bool ReachingDetach = false; + SmallPtrSet Visited; + SmallVector WorkList; + WorkList.push_back(&BB); + while (!WorkList.empty()) { + BasicBlock *PBB = WorkList.pop_back_val(); + if (!Visited.insert(PBB).second) + continue; + + for (pred_iterator PI = pred_begin(PBB), PE = pred_end(PBB); + PI != PE; ++PI) { + BasicBlock *Pred = *PI; + Instruction *PT = Pred->getTerminator(); + // Stop the traversal at the entry block of a detached CFG. + if (DetachInst *DI = dyn_cast(PT)) { + if (DI->getDetached() == PBB) + continue; + else if (DI->getSyncRegion() == Sync->getSyncRegion()) + // This detach reaches the sync through the continuation edge. + ReachingDetach = true; + } + if (ReachingDetach) + break; + + // Ignore predecessors via a reattach, which belong to child detached + // contexts. + if (isa(PT) || isDetachedRethrow(PT)) + continue; + + // For a predecessor terminated by a sync instruction, check the sync + // region it belongs to. If the sync belongs to the same sync region, + // ignore the predecessor. + if (SyncInst *SI = dyn_cast(PT)) + if (SI->getSyncRegion() == Sync->getSyncRegion()) + continue; + + WorkList.push_back(Pred); + } + } + + // If no detach reaches this sync, then this sync can be removed. + if (!ReachingDetach) { + BasicBlock* Succ = Sync->getSuccessor(0); + const Value *SyncReg = Sync->getSyncRegion(); + Instruction *MaybeSyncUnwind = Succ->getFirstNonPHIOrDbgOrLifetime(); + ReplaceInstWithInst(Sync, BranchInst::Create(Succ)); + Changed = true; + bool Recheck = false; + if (isSyncUnwind(MaybeSyncUnwind, SyncReg)) + Recheck |= removeDeadSyncUnwind(cast(MaybeSyncUnwind), DTU); + Recheck |= MergeBlockIntoPredecessor(Succ, DTU); + if (Recheck) goto check; + } + } + } + return Changed; +} + /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, @@ -271,6 +341,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, EverChanged |= tailMergeBlocksWithSimilarFunctionTerminators(F, DT ? &DTU : nullptr); EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); + EverChanged |= removeUselessSyncs(F, DT ? &DTU : nullptr); // If neither pass changed anything, we're done. if (!EverChanged) return false; @@ -286,6 +357,7 @@ static bool simplifyFunctionCFGImpl(Function &F, const TargetTransformInfo &TTI, do { EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); + EverChanged |= removeUselessSyncs(F, DT ? &DTU : nullptr); } while (EverChanged); return true; diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 2031e70bee1dbb..09d0c272175fad 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -72,6 +72,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" @@ -79,6 +80,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" using namespace llvm; #define DEBUG_TYPE "tailcallelim" @@ -424,6 +426,9 @@ class TailRecursionEliminator { // The instruction doing the accumulating. Instruction *AccumulatorRecursionInstr = nullptr; + // Map from sync region to return blocks to sync for that sync region. + DenseMap> ReturnBlocksToSync; + TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, DomTreeUpdater &DTU) @@ -437,6 +442,8 @@ class TailRecursionEliminator { bool eliminateCall(CallInst *CI); + void InsertSyncsIntoReturnBlocks(); + void cleanupAndFinalize(); bool processBlock(BasicBlock &BB); @@ -509,10 +516,17 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) { // Move all fixed sized allocas from HeaderBB to NewEntry. for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), NEBI = NewEntry->begin(); - OEBI != E;) - if (AllocaInst *AI = dyn_cast(OEBI++)) + OEBI != E;) { + auto I = OEBI++; + if (AllocaInst *AI = dyn_cast(I)) { if (isa(AI->getArraySize())) AI->moveBefore(&*NEBI); + } else if (IntrinsicInst *II = dyn_cast(I)) { + // Also move syncregions to NewEntry. + if (Intrinsic::syncregion_start == II->getIntrinsicID()) + II->moveBefore(&*NEBI); + } + } // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. @@ -801,6 +815,104 @@ void TailRecursionEliminator::cleanupAndFinalize() { } } +static void +getReturnBlocksToSync(BasicBlock *Entry, SyncInst *Sync, + SmallPtrSetImpl &ReturnBlocksToSync) { + // Walk the CFG from the entry block, stopping traversal at any sync within + // the same region. Record all blocks found that are terminated by a return + // instruction. + Value *SyncRegion = Sync->getSyncRegion(); + SmallVector WorkList; + SmallPtrSet Visited; + WorkList.push_back(Entry); + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Skip paths that are synced within the same region. + if (SyncInst *SI = dyn_cast(BB->getTerminator())) + if (SI->getSyncRegion() == SyncRegion) + continue; + + // If we find a return, we must add a sync before it if we eliminate a + // recursive tail call. + if (isa(BB->getTerminator())) + ReturnBlocksToSync.insert(BB); + + // Queue up successors to search. + for (BasicBlock *Succ : successors(BB)) + if (Succ != Sync->getParent()) + WorkList.push_back(Succ); + } +} + +static bool hasPrecedingSync(SyncInst *SI) { + // TODO: Save the results from previous calls to hasPrecedingSync, in order to + // speed up multiple calls to this routine for different sync instructions. + SmallPtrSet Visited; + SmallVector Worklist; + Worklist.push_back(SI); + while (!Worklist.empty()) { + Instruction *I = Worklist.pop_back_val(); + if (!Visited.insert(I->getParent()).second) + continue; + + // Scan the basic block in reverse for a taskframe.end. If found, skip the + // search to the corresponding taskframe.create(). + BasicBlock::iterator Iter(I); + BasicBlock::const_iterator BBStart(I->getParent()->begin()); + bool FoundPred = false; + while (Iter != BBStart) { + Instruction *I = &*Iter; + if (isTapirIntrinsic(Intrinsic::taskframe_end, I)) { + CallInst *TFEnd = cast(I); + Instruction *TaskFrame = cast(TFEnd->getArgOperand(0)); + if (TaskFrame->getParent() == I->getParent()) { + Iter = TaskFrame->getIterator(); + continue; + } + Worklist.push_back(TaskFrame); + FoundPred = true; + break; + } + Iter--; + } + + // If this block contains a taskframe.end whose taskframe.create exists in + // another block, then we're done with this block. + if (FoundPred) + continue; + + // Add predecessors of this block to the search, based on their terminators. + for (BasicBlock *Pred : predecessors(I->getParent())) { + Instruction *TI = Pred->getTerminator(); + // If we find a sync, then the searchis done. + if (isa(TI)) + return true; + + // Skip predecessors terminated by reattaches or detached.rethrows. This + // block will also have a detach as its predecessor, where we'll continue + // the search. + if (isa(TI) || isDetachedRethrow(TI)) + continue; + + // If we find a taskframe.resume, jump the search to the corresponding + // taskframe.create. + if (isTaskFrameResume(TI)) { + CallBase *CB = dyn_cast(TI); + Instruction *TaskFrame = cast(CB->getArgOperand(0)); + Worklist.push_back(TaskFrame); + continue; + } + // Otherwise, add the terminator to the worklist. + Worklist.push_back(TI); + } + } + // We finished the search and did not find a preceding sync. + return false; +} + bool TailRecursionEliminator::processBlock(BasicBlock &BB) { Instruction *TI = BB.getTerminator(); @@ -839,11 +951,163 @@ bool TailRecursionEliminator::processBlock(BasicBlock &BB) { if (CI) return eliminateCall(CI); + } else if (SyncInst *SI = dyn_cast(TI)) { + + BasicBlock *Succ = SI->getSuccessor(0); + // If the successor is terminated by a sync.unwind (which will necessarily + // be an invoke), skip TRE. + if (isSyncUnwind(Succ->getTerminator())) + return false; + + // Try to find a return instruction in the block following a sync. + Instruction *NextI = Succ->getFirstNonPHIOrDbgOrSyncUnwind(true); + Instruction *TapirRuntimeToRemove = nullptr; + if (isTapirIntrinsic(Intrinsic::tapir_runtime_end, NextI)) { + TapirRuntimeToRemove = + cast(cast(NextI)->getArgOperand(0)); + NextI = &*(++NextI->getIterator()); + } + ReturnInst *Ret = dyn_cast(NextI); + + BasicBlock *BrSucc = nullptr; + if (!Ret) { + // After the sync, there might be a block with a sync.unwind instruction + // and an unconditional branch to a block containing just a return. Check + // for this structure. + if (BranchInst *BI = dyn_cast(NextI)) { + if (BI->isConditional()) + return false; + + BrSucc = BI->getSuccessor(0); + Ret = dyn_cast(BrSucc->getFirstNonPHIOrDbg(true)); + } + } + if (!Ret) + return false; + + CallInst *CI = findTRECandidate(&BB); + + if (!CI) + return false; + + // Check that all instructions between the candidate tail call and the sync + // can be moved above the call. In particular, we disallow accumulator + // recursion elimination for tail calls before a sync. + BasicBlock::iterator BBI(CI); + for (++BBI; &*BBI != SI; ++BBI) + if (!canMoveAboveCall(&*BBI, CI, AA)) + break; + if (&*BBI != SI) + return false; + + // Get the sync region for this sync. + Value *SyncRegion = SI->getSyncRegion(); + BasicBlock *OldEntryBlock = &BB.getParent()->getEntryBlock(); + + // Check that the sync region begins in the entry block of the function. + if (cast(SyncRegion)->getParent() != OldEntryBlock) { + LLVM_DEBUG(dbgs() << "Cannot eliminate tail call " << *CI + << ": sync region does not start in entry block."); + return false; + } + + // Check for preceding syncs, since TRE would cause those syncs to + // synchronize any computations that this sync currently syncs. + if (hasPrecedingSync(SI)) + return false; + + // Get returns reachable from newly created loop. + getReturnBlocksToSync(OldEntryBlock, SI, ReturnBlocksToSync[SyncRegion]); + + // If we found a tapir.runtime.end intrinsic between the sync and return, + // remove it. + if (TapirRuntimeToRemove) { + SmallVector ToErase; + for (User *U : TapirRuntimeToRemove->users()) { + if (Instruction *I = dyn_cast(U)) { + if (!isTapirIntrinsic(Intrinsic::tapir_runtime_end, I)) + return false; + ToErase.push_back(I); + } + } + LLVM_DEBUG(dbgs() << "ERASING: " << *TapirRuntimeToRemove << "\n"); + for (Instruction *I : ToErase) + I->eraseFromParent(); + TapirRuntimeToRemove->eraseFromParent(); + } + + // If we found a sync.unwind and unconditional branch between the sync and + // return, first fold the return into this unconditional branch. + if (BrSucc) { + LLVM_DEBUG(dbgs() << "FOLDING: " << *BrSucc + << "INTO UNCOND BRANCH PRED: " << *Succ); + FoldReturnIntoUncondBranch(Ret, BrSucc, Succ, &DTU); + } + + // Fold the return into the sync. + LLVM_DEBUG(dbgs() << "FOLDING: " << *Succ << "INTO SYNC PRED: " << BB); + FoldReturnIntoUncondBranch(Ret, Succ, &BB, &DTU); + ++NumRetDuped; + + // If all predecessors of Succ have been eliminated by + // FoldReturnIntoUncondBranch, delete it. It is important to empty it, + // because the ret instruction in there is still using a value which + // eliminateCall will attempt to remove. This block can only contain + // instructions that can't have uses, therefore it is safe to remove. + if (pred_empty(Succ)) + DTU.deleteBB(Succ); + + bool EliminatedCall = eliminateCall(CI); + + // If a recursive tail was eliminated, fix up the syncs and sync region in + // the CFG. + if (EliminatedCall) { + // We defer the restoration of syncs at relevant return blocks until after + // all blocks are processed. This approach simplifies the logic for + // eliminating multiple tail calls that are only separated from the return + // by a sync, since the CFG won't be perturbed unnecessarily. + } else { + // Restore the sync that was eliminated. + BasicBlock *RetBlock = Ret->getParent(); + BasicBlock *NewRetBlock = SplitBlock(RetBlock, Ret, &DTU); + ReplaceInstWithInst(RetBlock->getTerminator(), + SyncInst::Create(NewRetBlock, SyncRegion)); + // The earlier call to FoldReturnIntoUncondBranch did not remove the + // sync.unwind, so there's nothing to do to restore the sync.unwind. + } + + return EliminatedCall; } return false; } +void TailRecursionEliminator::InsertSyncsIntoReturnBlocks() { + Function *SyncUnwindFn = + Intrinsic::getDeclaration(F.getParent(), Intrinsic::sync_unwind); + BasicBlock &NewEntry = F.getEntryBlock(); + + for (auto ReturnsToSync : ReturnBlocksToSync) { + Value *SyncRegion = ReturnsToSync.first; + SmallPtrSetImpl &ReturnBlocks = ReturnsToSync.second; + + // Move the sync region start to the new entry block. + cast(SyncRegion)->moveBefore(&*(NewEntry.begin())); + + // Insert syncs before relevant return blocks. + for (BasicBlock *RetBlock : ReturnBlocks) { + BasicBlock *NewRetBlock = + SplitBlock(RetBlock, RetBlock->getTerminator(), &DTU); + ReplaceInstWithInst(RetBlock->getTerminator(), + SyncInst::Create(NewRetBlock, SyncRegion)); + + if (!F.doesNotThrow()) + CallInst::Create(SyncUnwindFn, {SyncRegion}, "", + NewRetBlock->getTerminator()); + } + } +} + bool TailRecursionEliminator::eliminate(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, @@ -869,6 +1133,9 @@ bool TailRecursionEliminator::eliminate(Function &F, for (BasicBlock &BB : F) MadeChange |= TRE.processBlock(BB); + if (!TRE.ReturnBlocksToSync.empty()) + TRE.InsertSyncsIntoReturnBlocks(); + TRE.cleanupAndFinalize(); return MadeChange; diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp index e53019768e8819..ab75cef5a1031e 100644 --- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp +++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp @@ -80,6 +80,20 @@ static void warnAboutLeftoverTransformations(Loop *L, "requested transformation; the transformation might be disabled or " "specified as part of an unsupported transformation ordering"); } + + if (hasLoopStripmineTransformation(L) == TM_ForcedByUser) { + LLVM_DEBUG(dbgs() << "Leftover loop-stripmine transformation\n"); + ORE->emit( + DiagnosticInfoOptimizationFailure(DEBUG_TYPE, + "FailedRequestedStripmining", + L->getStartLoc(), L->getHeader()) + << "loop not stripmined: the optimizer was unable to perform the " + "requested transformation; the transformation might be disabled or " + "specified as part of an unsupported transformation ordering"); + } + + // This pass doesn't check whether LoopSpawning has been performed, because + // LoopSpawning runs as part of Tapir lowering, after this pass has run. } static void warnAboutLeftoverTransformations(Function *F, LoopInfo *LI, diff --git a/llvm/lib/Transforms/Tapir/CMakeLists.txt b/llvm/lib/Transforms/Tapir/CMakeLists.txt new file mode 100644 index 00000000000000..b7b4cbe9af5fbd --- /dev/null +++ b/llvm/lib/Transforms/Tapir/CMakeLists.txt @@ -0,0 +1,41 @@ +add_llvm_component_library(LLVMTapirOpts + CilkABI.cpp + CilkRTSCilkFor.cpp + DRFScopedNoAliasAA.cpp + LambdaABI.cpp + LoopSpawningTI.cpp + LoopStripMine.cpp + LoopStripMinePass.cpp + LoweringUtils.cpp + OMPTaskABI.cpp + OpenCilkABI.cpp + Outline.cpp + QthreadsABI.cpp + SerialABI.cpp + SerializeSmallTasks.cpp + Tapir.cpp + TapirToTarget.cpp + TapirLoopInfo.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Tapir + + DEPENDS + intrinsics_gen + LLVMLinker + + COMPONENT_NAME + TapirOpts + + LINK_COMPONENTS + Analysis + Core + IRReader + Linker + MC + Scalar + Support + TransformUtils + Vectorize + ) diff --git a/llvm/lib/Transforms/Tapir/CilkABI.cpp b/llvm/lib/Transforms/Tapir/CilkABI.cpp new file mode 100644 index 00000000000000..6d640e2ee22314 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/CilkABI.cpp @@ -0,0 +1,1796 @@ +//===- CilkABI.cpp - Lower Tapir into Cilk runtime system calls -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Cilk ABI to converts Tapir instructions to calls +// into the Cilk runtime system. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/CilkABI.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Support/Timer.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/Tapir/CilkRTSCilkFor.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +using namespace llvm; + +#define DEBUG_TYPE "cilkabi" + +extern cl::opt DebugABICalls; + +static cl::opt fastCilk( + "fast-cilk", cl::init(false), cl::Hidden, + cl::desc("Attempt faster Cilk call implementation")); + +static cl::opt ArgStruct( + "cilk-use-arg-struct", cl::init(false), cl::Hidden, + cl::desc("Use a struct to store arguments for detached tasks")); + +static const char TimerGroupName[] = DEBUG_TYPE; +static const char TimerGroupDescription[] = "CilkABI"; + +enum { + __CILKRTS_ABI_VERSION = 1 +}; + +enum { + CILK_FRAME_STOLEN = 0x01, + CILK_FRAME_UNSYNCHED = 0x02, + CILK_FRAME_DETACHED = 0x04, + CILK_FRAME_EXCEPTION_PROBED = 0x08, + CILK_FRAME_EXCEPTING = 0x10, + CILK_FRAME_LAST = 0x80, + CILK_FRAME_EXITING = 0x0100, + CILK_FRAME_SUSPENDED = 0x8000, + CILK_FRAME_UNWINDING = 0x10000 +}; + +#define CILK_FRAME_VERSION (__CILKRTS_ABI_VERSION << 24) +#define CILK_FRAME_VERSION_MASK 0xFF000000 +#define CILK_FRAME_FLAGS_MASK 0x00FFFFFF +#define CILK_FRAME_VERSION_VALUE(_flags) (((_flags) & CILK_FRAME_VERSION_MASK) >> 24) +#define CILK_FRAME_MBZ (~ (CILK_FRAME_STOLEN | \ + CILK_FRAME_UNSYNCHED | \ + CILK_FRAME_DETACHED | \ + CILK_FRAME_EXCEPTION_PROBED | \ + CILK_FRAME_EXCEPTING | \ + CILK_FRAME_LAST | \ + CILK_FRAME_EXITING | \ + CILK_FRAME_SUSPENDED | \ + CILK_FRAME_UNWINDING | \ + CILK_FRAME_VERSION_MASK)) + +#define CILKRTS_FUNC(name) Get__cilkrts_##name() + +TapirTarget::ArgStructMode CilkABI::getArgStructMode() const { + if (ArgStruct) + return ArgStructMode::Dynamic; + return ArgStructMode::None; +} + +void CilkABI::addHelperAttributes(Function &Helper) { + // Use a fast calling convention for the helper. + Helper.setCallingConv(CallingConv::Fast); + // Inlining the helper function is not legal. + Helper.removeFnAttr(Attribute::AlwaysInline); + Helper.addFnAttr(Attribute::NoInline); + // If the helper uses an argument structure, then it is not a write-only + // function. + if (getArgStructMode() != ArgStructMode::None) { + Helper.removeFnAttr(Attribute::WriteOnly); + Helper.setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); + } + // Note that the address of the helper is unimportant. + Helper.setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // The helper is internal to this module. + Helper.setLinkage(GlobalValue::InternalLinkage); +} + +CilkABI::CilkABI(Module &M) : TapirTarget(M) {} + +void CilkABI::prepareModule() { + LLVMContext &C = M.getContext(); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *Int64Ty = Type::getInt64Ty(C); + Type *Int32Ty = Type::getInt32Ty(C); + Type *Int16Ty = Type::getInt16Ty(C); + + // Get or create local definitions of Cilk RTS structure types. + PedigreeTy = StructType::lookupOrCreate(C, "struct.__cilkrts_pedigree"); + StackFrameTy = StructType::lookupOrCreate(C, "struct.__cilkrts_stack_frame"); + WorkerTy = StructType::lookupOrCreate(C, "struct.__cilkrts_worker"); + + if (PedigreeTy->isOpaque()) + PedigreeTy->setBody(Int64Ty, PointerType::getUnqual(PedigreeTy)); + if (StackFrameTy->isOpaque()) { + Type *PedigreeUnionTy = StructType::get(PedigreeTy); + StackFrameTy->setBody(Int32Ty, // flags + Int32Ty, // size + PointerType::getUnqual(StackFrameTy), // call_parent + PointerType::getUnqual(WorkerTy), // worker + VoidPtrTy, // except_data + ArrayType::get(VoidPtrTy, 5), // ctx + Int32Ty, // mxcsr + Int16Ty, // fpcsr + Int16Ty, // reserved + // union { spawn_helper_pedigree, parent_pedigree } + PedigreeUnionTy + ); + } + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + if (WorkerTy->isOpaque()) + WorkerTy->setBody(PointerType::getUnqual(StackFramePtrTy), // tail + PointerType::getUnqual(StackFramePtrTy), // head + PointerType::getUnqual(StackFramePtrTy), // exc + PointerType::getUnqual(StackFramePtrTy), // protected_tail + PointerType::getUnqual(StackFramePtrTy), // ltq_limit + Int32Ty, // self + VoidPtrTy, // g + VoidPtrTy, // l + VoidPtrTy, // reducer_map + StackFramePtrTy, // current_stack_frame + VoidPtrTy, // saved_protected_tail + VoidPtrTy, // sysdep + PedigreeTy // pedigree + ); +} + +// Accessors for opaque Cilk RTS functions +FunctionCallee CilkABI::Get__cilkrts_get_nworkers() { + if (CilkRTSGetNworkers) + return CilkRTSGetNworkers; + + LLVMContext &C = M.getContext(); + AttributeList AL; + AL = AL.addFnAttribute(C, Attribute::ReadNone); + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), {}, false); + CilkRTSGetNworkers = M.getOrInsertFunction("__cilkrts_get_nworkers", FTy, AL); + return CilkRTSGetNworkers; +} + +FunctionCallee CilkABI::Get__cilkrts_init() { + if (CilkRTSInit) + return CilkRTSInit; + + LLVMContext &C = M.getContext(); + Type *VoidTy = Type::getVoidTy(C); + CilkRTSInit = M.getOrInsertFunction("__cilkrts_init", VoidTy); + + return CilkRTSInit; +} + +FunctionCallee CilkABI::Get__cilkrts_leave_frame() { + if (CilkRTSLeaveFrame) + return CilkRTSLeaveFrame; + + LLVMContext &C = M.getContext(); + AttributeList AL; + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + Type *VoidTy = Type::getVoidTy(C); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + CilkRTSLeaveFrame = M.getOrInsertFunction("__cilkrts_leave_frame", AL, VoidTy, + StackFramePtrTy); + + return CilkRTSLeaveFrame; +} + +FunctionCallee CilkABI::Get__cilkrts_rethrow() { + if (CilkRTSRethrow) + return CilkRTSRethrow; + + LLVMContext &C = M.getContext(); + Type *VoidTy = Type::getVoidTy(C); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + CilkRTSRethrow = M.getOrInsertFunction("__cilkrts_rethrow", VoidTy, + StackFramePtrTy); + + return CilkRTSRethrow; +} + +FunctionCallee CilkABI::Get__cilkrts_sync() { + if (CilkRTSSync) + return CilkRTSSync; + + LLVMContext &C = M.getContext(); + Type *VoidTy = Type::getVoidTy(C); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + CilkRTSSync = M.getOrInsertFunction("__cilkrts_sync", VoidTy, + StackFramePtrTy); + + return CilkRTSSync; +} + +FunctionCallee CilkABI::Get__cilkrts_get_tls_worker() { + if (CilkRTSGetTLSWorker) + return CilkRTSGetTLSWorker; + + LLVMContext &C = M.getContext(); + PointerType *WorkerPtrTy = PointerType::getUnqual(WorkerTy); + AttributeList AL; + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + CilkRTSGetTLSWorker = M.getOrInsertFunction("__cilkrts_get_tls_worker", AL, + WorkerPtrTy); + + return CilkRTSGetTLSWorker; +} + +FunctionCallee CilkABI::Get__cilkrts_get_tls_worker_fast() { + if (CilkRTSGetTLSWorkerFast) + return CilkRTSGetTLSWorkerFast; + + LLVMContext &C = M.getContext(); + PointerType *WorkerPtrTy = PointerType::getUnqual(WorkerTy); + AttributeList AL; + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + CilkRTSGetTLSWorkerFast = M.getOrInsertFunction( + "__cilkrts_get_tls_worker_fast", AL, WorkerPtrTy); + + return CilkRTSGetTLSWorkerFast; +} + +FunctionCallee CilkABI::Get__cilkrts_bind_thread_1() { + if (CilkRTSBindThread1) + return CilkRTSBindThread1; + + LLVMContext &C = M.getContext(); + PointerType *WorkerPtrTy = PointerType::getUnqual(WorkerTy); + AttributeList AL; + AL = AL.addFnAttribute(C, Attribute::NoUnwind); + CilkRTSBindThread1 = M.getOrInsertFunction("__cilkrts_bind_thread_1", AL, + WorkerPtrTy); + + return CilkRTSBindThread1; +} + +/// Helper methods for storing to and loading from struct fields. +static Value *GEP(IRBuilder<> &B, Value *Base, Type *Ty, int Field) { + return B.CreateConstInBoundsGEP2_32(Ty, Base, 0, Field); +} + +static Align GetAlignment(const DataLayout &DL, StructType *STy, int Field) { + return DL.getPrefTypeAlign(STy->getElementType(Field)); +} + +static void StoreSTyField(IRBuilder<> &B, const DataLayout &DL, StructType *STy, + Value *Val, Value *Dst, int Field, + bool isVolatile = false, + AtomicOrdering Ordering = AtomicOrdering::NotAtomic) { + StoreInst *S = B.CreateAlignedStore(Val, GEP(B, Dst, STy, Field), + GetAlignment(DL, STy, Field), isVolatile); + S->setOrdering(Ordering); +} + +static Value *LoadSTyField( + IRBuilder<> &B, const DataLayout &DL, StructType *STy, Value *Src, + int Field, bool isVolatile = false, + AtomicOrdering Ordering = AtomicOrdering::NotAtomic) { + Value *GetElPtr = GEP(B, Src, STy, Field); + LoadInst *L = + B.CreateAlignedLoad(STy->getElementType(Field), + GetElPtr, GetAlignment(DL, STy, Field), isVolatile); + L->setOrdering(Ordering); + return L; +} + +/// Emit inline assembly code to save the floating point state, for x86 only. +void CilkABI::EmitSaveFloatingPointState(IRBuilder<> &B, Value *SF) { + LLVMContext &C = B.getContext(); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(C), + {PointerType::getUnqual(Type::getInt32Ty(C)), + PointerType::getUnqual(Type::getInt16Ty(C))}, + false); + + InlineAsm *Asm = InlineAsm::get(FTy, + "stmxcsr $0\n\t" + "fnstcw $1", + "*m,*m,~{dirflag},~{fpsr},~{flags}", + /*sideeffects*/ true); + + Value *Args[2] = { + GEP(B, SF, StackFrameTy, StackFrameFields::mxcsr), + GEP(B, SF, StackFrameTy, StackFrameFields::fpcsr) + }; + + CallInst *CI = B.CreateCall(Asm, Args); + CI->addParamAttr( + 0, Attribute::get(C, Attribute::ElementType, Type::getInt32Ty(C))); + CI->addParamAttr( + 1, Attribute::get(C, Attribute::ElementType, Type::getInt16Ty(C))); +} + +/// Helper to find a function with the given name, creating it if it doesn't +/// already exist. Returns false if the function was inserted, indicating that +/// the body of the function has yet to be defined. +static bool GetOrCreateFunction(Module &M, const StringRef FnName, + FunctionType *FTy, Function *&Fn) { + // If the function already exists then let the caller know. + if ((Fn = M.getFunction(FnName))) + return true; + + // Otherwise we have to create it. + Fn = cast(M.getOrInsertFunction(FnName, FTy).getCallee()); + + // Let the caller know that the function is incomplete and the body still + // needs to be added. + return false; +} + +/// Emit a call to the CILK_SETJMP function. +CallInst *CilkABI::EmitCilkSetJmp(IRBuilder<> &B, Value *SF) { + LLVMContext &Ctx = M.getContext(); + + // We always want to save the floating point state too + Triple T(M.getTargetTriple()); + if (T.getArch() == Triple::x86 || T.getArch() == Triple::x86_64) + EmitSaveFloatingPointState(B, SF); + + Type *Int32Ty = Type::getInt32Ty(Ctx); + Type *Int8PtrTy = Type::getInt8PtrTy(Ctx); + + // Get the buffer to store program state + // Buffer is a void**. + Value *Buf = GEP(B, SF, StackFrameTy, StackFrameFields::ctx); + Type *BufTy = StackFrameTy->getElementType(StackFrameFields::ctx); + // ->getArrayElementType(); + + // Store the frame pointer in the 0th slot + Value *FrameAddr = B.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::frameaddress, Int8PtrTy), + ConstantInt::get(Int32Ty, 0)); + + Value *FrameSaveSlot = GEP(B, Buf, BufTy, 0); + B.CreateStore(FrameAddr, FrameSaveSlot, /*isVolatile=*/true); + + // Store stack pointer in the 2nd slot + Value *StackAddr = B.CreateCall( + Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + + Value *StackSaveSlot = GEP(B, Buf, BufTy, 2); + B.CreateStore(StackAddr, StackSaveSlot, /*isVolatile=*/true); + + Buf = B.CreateBitCast(Buf, Int8PtrTy); + + // Call LLVM's EH setjmp, which is lightweight. + Function *F = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); + + CallInst *SetjmpCall = B.CreateCall(F, Buf); + SetjmpCall->setCanReturnTwice(); + + return SetjmpCall; +} + +/// Get or create a LLVM function for __cilkrts_pop_frame. It is equivalent to +/// the following C code: +/// +/// __cilkrts_pop_frame(__cilkrts_stack_frame *sf) { +/// sf->worker->current_stack_frame = sf->call_parent; +/// sf->call_parent = nullptr; +/// } +Function *CilkABI::Get__cilkrts_pop_frame() { + // Get or create the __cilkrts_pop_frame function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilkrts_pop_frame", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilkrts_pop_frame. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn); + IRBuilder<> B(Entry); + + // sf->worker->current_stack_frame = sf->call_parent; + StoreSTyField(B, DL, WorkerTy, + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::call_parent, + /*isVolatile=*/false, + AtomicOrdering::NotAtomic), + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire), + WorkerFields::current_stack_frame, + /*isVolatile=*/false, + AtomicOrdering::Release); + + // sf->call_parent = nullptr; + StoreSTyField(B, DL, StackFrameTy, + Constant::getNullValue(PointerType::getUnqual(StackFrameTy)), + SF, StackFrameFields::call_parent, /*isVolatile=*/false, + AtomicOrdering::Release); + + B.CreateRetVoid(); + + Fn->setLinkage(Function::AvailableExternallyLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + + return Fn; +} + +/// Get or create a LLVM function for __cilkrts_detach. It is equivalent to the +/// following C code: +/// +/// void __cilkrts_detach(struct __cilkrts_stack_frame *sf) { +/// struct __cilkrts_worker *w = sf->worker; +/// struct __cilkrts_stack_frame *parent = sf->call_parent; +/// struct __cilkrts_stack_frame *volatile *tail = w->tail; +/// +/// sf->spawn_helper_pedigree = w->pedigree; +/// parent->parent_pedigree = w->pedigree; +/// +/// w->pedigree.rank = 0; +/// w->pedigree.next = &sf->spawn_helper_pedigree; +/// +/// StoreStore_fence(); +/// +/// *tail++ = parent; +/// w->tail = tail; +/// +/// sf->flags |= CILK_FRAME_DETACHED; +/// } +Function *CilkABI::Get__cilkrts_detach() { + // Get or create the __cilkrts_detach function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilkrts_detach", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilkrts_detach. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn); + IRBuilder<> B(Entry); + + // struct __cilkrts_worker *w = sf->worker; + Value *W = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, /*isVolatile=*/false, + AtomicOrdering::NotAtomic); + + // __cilkrts_stack_frame *parent = sf->call_parent; + Value *Parent = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::call_parent, + /*isVolatile=*/false, + AtomicOrdering::NotAtomic); + + // __cilkrts_stack_frame *volatile *tail = w->tail; + Value *Tail = LoadSTyField(B, DL, WorkerTy, W, + WorkerFields::tail, /*isVolatile=*/false, + AtomicOrdering::Acquire); + + // sf->spawn_helper_pedigree = w->pedigree; + Value *WorkerPedigree = LoadSTyField(B, DL, WorkerTy, W, + WorkerFields::pedigree); + Value *NewHelperPedigree = B.CreateInsertValue( + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::parent_pedigree), WorkerPedigree, { 0 }); + StoreSTyField(B, DL, StackFrameTy, NewHelperPedigree, SF, + StackFrameFields::parent_pedigree); + // parent->parent_pedigree = w->pedigree; + Value *NewParentPedigree = B.CreateInsertValue( + LoadSTyField(B, DL, StackFrameTy, Parent, + StackFrameFields::parent_pedigree), WorkerPedigree, { 0 }); + StoreSTyField(B, DL, StackFrameTy, NewParentPedigree, Parent, + StackFrameFields::parent_pedigree); + + // w->pedigree.rank = 0; + { + StructType *STy = PedigreeTy; + Type *Ty = STy->getElementType(PedigreeFields::rank); + StoreSTyField(B, DL, STy, ConstantInt::get(Ty, 0), + GEP(B, W, WorkerTy, WorkerFields::pedigree), + PedigreeFields::rank, + /*isVolatile=*/false, AtomicOrdering::Release); + } + + // w->pedigree.next = &sf->spawn_helper_pedigree; + StoreSTyField( + B, DL, PedigreeTy, + GEP(B, GEP(B, SF, StackFrameTy, StackFrameFields::parent_pedigree), + StackFrameTy->getElementType(StackFrameFields::parent_pedigree), 0), + GEP(B, W, WorkerTy, WorkerFields::pedigree), PedigreeFields::next, + /*isVolatile=*/false, AtomicOrdering::Release); + + // StoreStore_fence(); + B.CreateFence(AtomicOrdering::Release); + + // *tail++ = parent; + B.CreateStore(Parent, Tail, /*isVolatile=*/true); + Tail = B.CreateConstGEP1_32(StackFramePtrTy, Tail, 1); + + // w->tail = tail; + StoreSTyField(B, DL, WorkerTy, Tail, W, WorkerFields::tail, + /*isVolatile=*/false, AtomicOrdering::Release); + + // sf->flags |= CILK_FRAME_DETACHED; + { + Value *F = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Acquire); + F = B.CreateOr(F, ConstantInt::get(F->getType(), CILK_FRAME_DETACHED)); + StoreSTyField(B, DL, StackFrameTy, F, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + } + + B.CreateRetVoid(); + + Fn->setLinkage(Function::AvailableExternallyLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + + return Fn; +} + +/// Get or create a LLVM function for __cilk_sync. Calls to this function is +/// always inlined, as it saves the current stack/frame pointer values. This +/// function must be marked as returns_twice to allow it to be inlined, since +/// the call to setjmp is marked returns_twice. +/// +/// It is equivalent to the following C code: +/// +/// void __cilk_sync(struct __cilkrts_stack_frame *sf) { +/// if (sf->flags & CILK_FRAME_UNSYNCHED) { +/// sf->parent_pedigree = sf->worker->pedigree; +/// SAVE_FLOAT_STATE(*sf); +/// if (!CILK_SETJMP(sf->ctx)) +/// __cilkrts_sync(sf); +/// else if (sf->flags & CILK_FRAME_EXCEPTING) +/// __cilkrts_rethrow(sf); +/// } +/// ++sf->worker->pedigree.rank; +/// } +/// +/// With exceptions disabled in the compiler, the function +/// does not call __cilkrts_rethrow() +Function *CilkABI::GetCilkSyncFn(bool instrument) { + // Get or create the __cilk_sync function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilk_sync", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilk_sync. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "cilk.sync.test", Fn); + BasicBlock *SaveState = BasicBlock::Create(Ctx, "cilk.sync.savestate", Fn); + BasicBlock *SyncCall = BasicBlock::Create(Ctx, "cilk.sync.runtimecall", Fn); + BasicBlock *Excepting = BasicBlock::Create(Ctx, "cilk.sync.excepting", Fn); + BasicBlock *Rethrow = BasicBlock::Create(Ctx, "cilk.sync.rethrow", Fn); + BasicBlock *Exit = BasicBlock::Create(Ctx, "cilk.sync.end", Fn); + + // Entry + { + IRBuilder<> B(Entry); + + // if (instrument) + // // cilk_sync_begin + // B.CreateCall(CILK_CSI_FUNC(sync_begin, M), SF); + + // if (sf->flags & CILK_FRAME_UNSYNCHED) + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateAnd(Flags, + ConstantInt::get(Flags->getType(), + CILK_FRAME_UNSYNCHED)); + Value *Zero = ConstantInt::get(Flags->getType(), 0); + Value *Unsynced = B.CreateICmpEQ(Flags, Zero); + B.CreateCondBr(Unsynced, Exit, SaveState); + } + + // SaveState + { + IRBuilder<> B(SaveState); + + // sf.parent_pedigree = sf.worker->pedigree; + Value *NewParentPedigree = B.CreateInsertValue( + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::parent_pedigree), + LoadSTyField(B, DL, WorkerTy, + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire), + WorkerFields::pedigree), { 0 }); + StoreSTyField(B, DL, StackFrameTy, NewParentPedigree, SF, + StackFrameFields::parent_pedigree); + + // if (!CILK_SETJMP(sf.ctx)) + Value *C = EmitCilkSetJmp(B, SF); + C = B.CreateICmpEQ(C, ConstantInt::get(C->getType(), 0)); + B.CreateCondBr(C, SyncCall, Excepting); + } + + // SyncCall + { + IRBuilder<> B(SyncCall); + + // __cilkrts_sync(sf); + B.CreateCall(CILKRTS_FUNC(sync), SF); + B.CreateBr(Exit); + } + + // Excepting + { + IRBuilder<> B(Excepting); + if (Rethrow) { + // if (sf->flags & CILK_FRAME_EXCEPTING) + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateAnd(Flags, + ConstantInt::get(Flags->getType(), + CILK_FRAME_EXCEPTING)); + Value *Zero = ConstantInt::get(Flags->getType(), 0); + Value *CanExcept = B.CreateICmpEQ(Flags, Zero); + B.CreateCondBr(CanExcept, Exit, Rethrow); + } else { + B.CreateBr(Exit); + } + } + + // Rethrow + if (Rethrow) { + IRBuilder<> B(Rethrow); + // __cilkrts_rethrow(sf); + B.CreateCall(CILKRTS_FUNC(rethrow), SF)->setDoesNotReturn(); + B.CreateUnreachable(); + } + + // Exit + { + IRBuilder<> B(Exit); + + // ++sf.worker->pedigree.rank; + Value *Worker = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Value *Pedigree = GEP(B, Worker, WorkerTy, WorkerFields::pedigree); + Value *Rank = GEP(B, Pedigree, PedigreeTy, PedigreeFields::rank); + Type *RankTy = PedigreeTy->getElementType(PedigreeFields::rank); + Align RankAlignment = GetAlignment(DL, PedigreeTy, PedigreeFields::rank); + B.CreateAlignedStore( + B.CreateAdd(B.CreateAlignedLoad(RankTy, Rank, RankAlignment), + ConstantInt::get(RankTy, 1)), + Rank, RankAlignment); + // if (instrument) + // // cilk_sync_end + // B.CreateCall(CILK_CSI_FUNC(sync_end, M), SF); + + B.CreateRetVoid(); + } + + Fn->setLinkage(Function::AvailableExternallyLinkage); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + Fn->addFnAttr(Attribute::ReturnsTwice); + + return Fn; +} + +/// Get or create a LLVM function for __cilk_sync_nothrow. Calls to this +/// function is always inlined, as it saves the current stack/frame pointer +/// values. This function must be marked as returns_twice to allow it to be +/// inlined, since the call to setjmp is marked returns_twice. +/// +/// It is equivalent to the following C code: +/// +/// void __cilk_sync_nothrow(struct __cilkrts_stack_frame *sf) { +/// if (sf->flags & CILK_FRAME_UNSYNCHED) { +/// sf->parent_pedigree = sf->worker->pedigree; +/// SAVE_FLOAT_STATE(*sf); +/// if (!CILK_SETJMP(sf->ctx)) +/// __cilkrts_sync(sf); +/// } +/// ++sf->worker->pedigree.rank; +/// } +/// +/// With exceptions disabled in the compiler, the function +/// does not call __cilkrts_rethrow() +Function *CilkABI::GetCilkSyncNothrowFn(bool instrument) { + // Get or create the __cilk_sync_nothrow function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilk_sync_nothrow", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilk_sync_nothrow. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "cilk.sync.test", Fn); + BasicBlock *SaveState = BasicBlock::Create(Ctx, "cilk.sync.savestate", Fn); + BasicBlock *SyncCall = BasicBlock::Create(Ctx, "cilk.sync.runtimecall", Fn); + BasicBlock *Exit = BasicBlock::Create(Ctx, "cilk.sync.end", Fn); + + // Entry + { + IRBuilder<> B(Entry); + + // if (instrument) + // // cilk_sync_begin + // B.CreateCall(CILK_CSI_FUNC(sync_begin, M), SF); + + // if (sf->flags & CILK_FRAME_UNSYNCHED) + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateAnd(Flags, + ConstantInt::get(Flags->getType(), + CILK_FRAME_UNSYNCHED)); + Value *Zero = ConstantInt::get(Flags->getType(), 0); + Value *Unsynced = B.CreateICmpEQ(Flags, Zero); + B.CreateCondBr(Unsynced, Exit, SaveState); + } + + // SaveState + { + IRBuilder<> B(SaveState); + + // sf.parent_pedigree = sf.worker->pedigree; + Value *NewParentPedigree = B.CreateInsertValue( + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::parent_pedigree), + LoadSTyField(B, DL, WorkerTy, + LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire), + WorkerFields::pedigree), { 0 }); + StoreSTyField(B, DL, StackFrameTy, NewParentPedigree, SF, + StackFrameFields::parent_pedigree); + + // if (!CILK_SETJMP(sf.ctx)) + Value *C = EmitCilkSetJmp(B, SF); + C = B.CreateICmpEQ(C, ConstantInt::get(C->getType(), 0)); + B.CreateCondBr(C, SyncCall, Exit); + } + + // SyncCall + { + IRBuilder<> B(SyncCall); + + // __cilkrts_sync(sf); + B.CreateCall(CILKRTS_FUNC(sync), SF); + B.CreateBr(Exit); + } + + // Exit + { + IRBuilder<> B(Exit); + + // ++sf.worker->pedigree.rank; + Value *Worker = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Value *Pedigree = GEP(B, Worker, WorkerTy, WorkerFields::pedigree); + Value *Rank = GEP(B, Pedigree, PedigreeTy, PedigreeFields::rank); + Type *RankTy = PedigreeTy->getElementType(PedigreeFields::rank); + Align RankAlignment = GetAlignment(DL, PedigreeTy, PedigreeFields::rank); + B.CreateAlignedStore( + B.CreateAdd(B.CreateAlignedLoad(RankTy, Rank, RankAlignment), + ConstantInt::get(RankTy, 1)), + Rank, RankAlignment); + // if (instrument) + // // cilk_sync_end + // B.CreateCall(CILK_CSI_FUNC(sync_end, M), SF); + + B.CreateRetVoid(); + } + + Fn->setLinkage(Function::InternalLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + Fn->addFnAttr(Attribute::ReturnsTwice); + + return Fn; +} + +/// Get or create a LLVM function for __cilk_sync. Calls to this function is +/// always inlined, as it saves the current stack/frame pointer values. This +/// function must be marked as returns_twice to allow it to be inlined, since +/// the call to setjmp is marked returns_twice. +/// +/// It is equivalent to the following C code: +/// +/// void *__cilk_catch_exception(struct __cilkrts_stack_frame *sf, void *Exn) { +/// if (sf->flags & CILK_FRAME_UNSYNCHED) { +/// if (!CILK_SETJMP(sf->ctx)) { +/// sf->except_data = Exn; +/// sf->flags |= CILK_FRAME_EXCEPTING; +/// __cilkrts_sync(sf); +/// } +/// sf->flags &= ~CILK_FRAME_EXCEPTING +/// Exn = sf->except_data; +/// } +/// ++sf->worker->pedigree.rank; +/// return Exn; +/// } +/// +/// With exceptions disabled in the compiler, the function +/// does not call __cilkrts_rethrow() +Function *CilkABI::GetCilkCatchExceptionFn(Type *ExnTy) { + // Get or create the __cilk_catch_exception function. + LLVMContext &Ctx = M.getContext(); + + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilk_catch_exception", + FunctionType::get(ExnTy, + {StackFramePtrTy, ExnTy}, + false), Fn)) + return Fn; + + // Create the body of __cilk_catch_exeption + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args++; + Value *Exn = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "cilk.catch.test", Fn); + BasicBlock *SetJmp = BasicBlock::Create(Ctx, "cilk.catch.setjmp", Fn); + BasicBlock *SyncCall = BasicBlock::Create(Ctx, "cilk.catch.runtimecall", Fn); + BasicBlock *Catch = BasicBlock::Create(Ctx, "cilk.catch.catch", Fn); + BasicBlock *Exit = BasicBlock::Create(Ctx, "cilk.catch.end", Fn); + + Value *NewExn; + + // Entry + { + IRBuilder<> B(Entry); + + // if (sf->flags & CILK_FRAME_UNSYNCHED) + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateAnd(Flags, + ConstantInt::get(Flags->getType(), + CILK_FRAME_UNSYNCHED)); + Value *Zero = ConstantInt::get(Flags->getType(), 0); + Value *Unsynced = B.CreateICmpEQ(Flags, Zero); + B.CreateCondBr(Unsynced, Exit, SetJmp); + } + + // SetJmp + { + IRBuilder<> B(SetJmp); + + // if (!CILK_SETJMP(sf.ctx)) + Value *C = EmitCilkSetJmp(B, SF); + C = B.CreateICmpEQ(C, ConstantInt::get(C->getType(), 0)); + B.CreateCondBr(C, SyncCall, Catch); + } + + // SyncCall + { + IRBuilder<> B(SyncCall); + + // sf->except_data = Exn; + // sf->flags = sf->flags | CILK_FRAME_EXCEPTING; + StoreSTyField(B, DL, StackFrameTy, Exn, SF, + StackFrameFields::except_data, /*isVolatile=*/false, + AtomicOrdering::Release); + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateOr(Flags, ConstantInt::get(Flags->getType(), + CILK_FRAME_EXCEPTING)); + StoreSTyField(B, DL, StackFrameTy, Flags, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + + // __cilkrts_sync(sf); + B.CreateCall(CILKRTS_FUNC(sync), SF); + B.CreateBr(Catch); + } + + // Catch + { + IRBuilder<> B(Catch); + // sf->flags = sf->flags & ~CILK_FRAME_EXCEPTING; + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = B.CreateAnd(Flags, ConstantInt::get(Flags->getType(), + ~CILK_FRAME_EXCEPTING)); + StoreSTyField(B, DL, StackFrameTy, Flags, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + + // Exn = sf->except_data; + NewExn = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::except_data, /*isVolatile=*/false, + AtomicOrdering::Acquire); + B.CreateBr(Exit); + } + + // Exit + { + IRBuilder<> B(Exit); + + PHINode *ExnPN = B.CreatePHI(ExnTy, 2); + ExnPN->addIncoming(Exn, Entry); + ExnPN->addIncoming(NewExn, Catch); + + // ++sf.worker->pedigree.rank; + Value *Worker = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::worker, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Value *Pedigree = GEP(B, Worker, WorkerTy, WorkerFields::pedigree); + Value *Rank = GEP(B, Pedigree, PedigreeTy, PedigreeFields::rank); + Type *RankTy = PedigreeTy->getElementType(PedigreeFields::rank); + Align RankAlignment = GetAlignment(DL, PedigreeTy, PedigreeFields::rank); + B.CreateAlignedStore( + B.CreateAdd(B.CreateAlignedLoad(RankTy, Rank, RankAlignment), + ConstantInt::get(RankTy, 1)), + Rank, RankAlignment); + + B.CreateRet(ExnPN); + } + + Fn->setLinkage(Function::InternalLinkage); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + Fn->addFnAttr(Attribute::ReturnsTwice); + + return Fn; +} + +/// Get or create a LLVM function for __cilkrts_enter_frame. It is equivalent +/// to the following C code: +/// +/// void __cilkrts_enter_frame_1(struct __cilkrts_stack_frame *sf) +/// { +/// struct __cilkrts_worker *w = __cilkrts_get_tls_worker(); +/// if (w == 0) { /* slow path, rare */ +/// w = __cilkrts_bind_thread_1(); +/// sf->flags = CILK_FRAME_LAST | CILK_FRAME_VERSION; +/// } else { +/// sf->flags = CILK_FRAME_VERSION; +/// } +/// sf->call_parent = w->current_stack_frame; +/// sf->worker = w; +/// /* sf->except_data is only valid when CILK_FRAME_EXCEPTING is set */ +/// w->current_stack_frame = sf; +/// } +Function *CilkABI::Get__cilkrts_enter_frame_1() { + // Get or create the __cilkrts_enter_frame_1 function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilkrts_enter_frame_1", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilkrts_enter_frame_1. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn); + BasicBlock *SlowPath = BasicBlock::Create(Ctx, "slowpath", Fn); + BasicBlock *FastPath = BasicBlock::Create(Ctx, "fastpath", Fn); + BasicBlock *Cont = BasicBlock::Create(Ctx, "cont", Fn); + + PointerType *WorkerPtrTy = PointerType::getUnqual(WorkerTy); + StructType *SFTy = StackFrameTy; + + // Block (Entry) + CallInst *W = nullptr; + { + IRBuilder<> B(Entry); + // struct __cilkrts_worker *w = __cilkrts_get_tls_worker(); + if (fastCilk) + W = B.CreateCall(CILKRTS_FUNC(get_tls_worker_fast)); + else + W = B.CreateCall(CILKRTS_FUNC(get_tls_worker)); + + // if (w == 0) + Value *Cond = B.CreateICmpEQ(W, ConstantPointerNull::get(WorkerPtrTy)); + B.CreateCondBr(Cond, SlowPath, FastPath); + } + // Block (SlowPath) + CallInst *Wslow = nullptr; + { + IRBuilder<> B(SlowPath); + // w = __cilkrts_bind_thread_1(); + Wslow = B.CreateCall(CILKRTS_FUNC(bind_thread_1)); + // sf->flags = CILK_FRAME_LAST | CILK_FRAME_VERSION; + Type *Ty = SFTy->getElementType(StackFrameFields::flags); + StoreSTyField(B, DL, StackFrameTy, + ConstantInt::get(Ty, CILK_FRAME_LAST | CILK_FRAME_VERSION), + SF, StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + B.CreateBr(Cont); + } + // Block (FastPath) + { + IRBuilder<> B(FastPath); + // sf->flags = CILK_FRAME_VERSION; + Type *Ty = SFTy->getElementType(StackFrameFields::flags); + StoreSTyField(B, DL, StackFrameTy, + ConstantInt::get(Ty, CILK_FRAME_VERSION), + SF, StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + B.CreateBr(Cont); + } + // Block (Cont) + { + IRBuilder<> B(Cont); + Value *Wfast = W; + PHINode *W = B.CreatePHI(WorkerPtrTy, 2); + W->addIncoming(Wslow, SlowPath); + W->addIncoming(Wfast, FastPath); + + // sf->call_parent = w->current_stack_frame; + StoreSTyField(B, DL, StackFrameTy, + LoadSTyField(B, DL, WorkerTy, W, + WorkerFields::current_stack_frame, + /*isVolatile=*/false, + AtomicOrdering::Acquire), + SF, StackFrameFields::call_parent, /*isVolatile=*/false, + AtomicOrdering::Release); + // sf->worker = w; + StoreSTyField(B, DL, StackFrameTy, W, SF, + StackFrameFields::worker, /*isVolatile=*/false, + AtomicOrdering::Release); + // w->current_stack_frame = sf; + StoreSTyField(B, DL, WorkerTy, SF, W, + WorkerFields::current_stack_frame, /*isVolatile=*/false, + AtomicOrdering::Release); + + B.CreateRetVoid(); + } + + Fn->setLinkage(Function::AvailableExternallyLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + + return Fn; +} + +/// Get or create a LLVM function for __cilkrts_enter_frame_fast. It is +/// equivalent to the following C code: +/// +/// void __cilkrts_enter_frame_fast_1(struct __cilkrts_stack_frame *sf) +/// { +/// struct __cilkrts_worker *w = __cilkrts_get_tls_worker(); +/// sf->flags = CILK_FRAME_VERSION; +/// sf->call_parent = w->current_stack_frame; +/// sf->worker = w; +/// /* sf->except_data is only valid when CILK_FRAME_EXCEPTING is set */ +/// w->current_stack_frame = sf; +/// } +Function *CilkABI::Get__cilkrts_enter_frame_fast_1() { + // Get or create the __cilkrts_enter_frame_fast_1 function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilkrts_enter_frame_fast_1", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilkrts_enter_frame_fast_1. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn); + + IRBuilder<> B(Entry); + Value *W; + + // struct __cilkrts_worker *w = __cilkrts_get_tls_worker(); + // if (fastCilk) + W = B.CreateCall(CILKRTS_FUNC(get_tls_worker_fast)); + // else + // W = B.CreateCall(CILKRTS_FUNC(get_tls_worker)); + + StructType *SFTy = StackFrameTy; + Type *Ty = SFTy->getElementType(StackFrameFields::flags); + + // sf->flags = CILK_FRAME_VERSION; + StoreSTyField(B, DL, StackFrameTy, + ConstantInt::get(Ty, CILK_FRAME_VERSION), + SF, StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + // sf->call_parent = w->current_stack_frame; + StoreSTyField(B, DL, StackFrameTy, + LoadSTyField(B, DL, WorkerTy, W, + WorkerFields::current_stack_frame, + /*isVolatile=*/false, + AtomicOrdering::Acquire), + SF, StackFrameFields::call_parent, /*isVolatile=*/false, + AtomicOrdering::Release); + // sf->worker = w; + StoreSTyField(B, DL, StackFrameTy, W, SF, + StackFrameFields::worker, /*isVolatile=*/false, + AtomicOrdering::Release); + // w->current_stack_frame = sf; + StoreSTyField(B, DL, WorkerTy, SF, W, + WorkerFields::current_stack_frame, /*isVolatile=*/false, + AtomicOrdering::Release); + + B.CreateRetVoid(); + + Fn->setLinkage(Function::AvailableExternallyLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + + return Fn; +} + +// /// Get or create a LLVM function for __cilk_parent_prologue. +// /// It is equivalent to the following C code: +// /// +// /// void __cilk_parent_prologue(__cilkrts_stack_frame *sf) { +// /// __cilkrts_enter_frame_1(sf); +// /// } +// static Function *GetCilkParentPrologue(Module &M) { +// Function *Fn = 0; + +// if (GetOrCreateFunction("__cilk_parent_prologue", M, Fn)) +// return Fn; + +// // If we get here we need to add the function body +// LLVMContext &Ctx = M.getContext(); + +// Function::arg_iterator args = Fn->arg_begin(); +// Value *SF = &*args; + +// BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn); +// IRBuilder<> B(Entry); + +// // __cilkrts_enter_frame_1(sf) +// B.CreateCall(CILKRTS_FUNC(enter_frame_1), SF); + +// B.CreateRetVoid(); + +// Fn->addFnAttr(Attribute::InlineHint); + +// return Fn; +// } + +/// Get or create a LLVM function for __cilk_parent_epilogue. It is equivalent +/// to the following C code: +/// +/// void __cilk_parent_epilogue(__cilkrts_stack_frame *sf) { +/// __cilkrts_pop_frame(sf); +/// if (sf->flags != CILK_FRAME_VERSION) +/// __cilkrts_leave_frame(sf); +/// } +Function *CilkABI::GetCilkParentEpilogueFn(bool instrument) { + // Get or create the __cilk_parent_epilogue function. + LLVMContext &Ctx = M.getContext(); + Type *VoidTy = Type::getVoidTy(Ctx); + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Function *Fn = nullptr; + if (GetOrCreateFunction(M, "__cilk_parent_epilogue", + FunctionType::get(VoidTy, {StackFramePtrTy}, false), + Fn)) + return Fn; + + // Create the body of __cilk_parent_epilogue. + const DataLayout &DL = M.getDataLayout(); + + Function::arg_iterator args = Fn->arg_begin(); + Value *SF = &*args; + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", Fn), + *B1 = BasicBlock::Create(Ctx, "body", Fn), + *Exit = BasicBlock::Create(Ctx, "exit", Fn); + CallInst *PopFrame; + + // Entry + { + IRBuilder<> B(Entry); + + // if (instrument) + // // cilk_leave_begin + // B.CreateCall(CILK_CSI_FUNC(leave_begin, M), SF); + + // __cilkrts_pop_frame(sf) + PopFrame = B.CreateCall(CILKRTS_FUNC(pop_frame), SF); + + // if (sf->flags != CILK_FRAME_VERSION) + Value *Flags = LoadSTyField(B, DL, StackFrameTy, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Acquire); + Value *Cond = B.CreateICmpNE( + Flags, ConstantInt::get(Flags->getType(), CILK_FRAME_VERSION)); + B.CreateCondBr(Cond, B1, Exit); + } + + // B1 + { + IRBuilder<> B(B1); + + // __cilkrts_leave_frame(sf); + B.CreateCall(CILKRTS_FUNC(leave_frame), SF); + B.CreateBr(Exit); + } + + // Exit + { + IRBuilder<> B(Exit); + // if (instrument) + // // cilk_leave_end + // B.CreateCall(CILK_CSI_FUNC(leave_end, M)); + B.CreateRetVoid(); + } + + // Inline the pop_frame call. + CallsToInline.insert(PopFrame); + + Fn->setLinkage(Function::AvailableExternallyLinkage); + Fn->setDoesNotThrow(); + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + + return Fn; +} + +static const StringRef stack_frame_name = "__cilkrts_sf"; + +/// Create the __cilkrts_stack_frame for the spawning function. +AllocaInst *CilkABI::CreateStackFrame(Function &F) { + const DataLayout &DL = M.getDataLayout(); + Type *SFTy = StackFrameTy; + + IRBuilder<> B(&*F.getEntryBlock().getFirstInsertionPt()); + AllocaInst *SF = B.CreateAlloca(SFTy, DL.getAllocaAddrSpace(), + /*ArraySize*/nullptr, + /*Name*/stack_frame_name); + SF->setAlignment(Align(8)); + + return SF; +} + +Value *CilkABI::GetOrInitCilkStackFrame(Function &F, bool Helper, + bool instrument) { + if (DetachCtxToStackFrame.count(&F)) + return DetachCtxToStackFrame[&F]; + + AllocaInst *SF = CreateStackFrame(F); + DetachCtxToStackFrame[&F] = SF; + BasicBlock::iterator InsertPt = ++SF->getIterator(); + IRBuilder<> IRB(&(F.getEntryBlock()), InsertPt); + + // if (instrument) { + // Type *Int8PtrTy = IRB.getInt8PtrTy(); + // Value *ThisFn = ConstantExpr::getBitCast(&F, Int8PtrTy); + // Value *ReturnAddress = + // IRB.CreateCall(Intrinsic::getDeclaration(M, + // Intrinsic::returnaddress), + // IRB.getInt32(0)); + // StackSave = + // IRB.CreateCall(Intrinsic::getDeclaration(M, + // Intrinsic::stacksave)); + // if (Helper) { + // Value *begin_args[3] = { SF, ThisFn, ReturnAddress }; + // IRB.CreateCall(CILK_CSI_FUNC(enter_helper_begin, *M), + // begin_args); + // } else { + // Value *begin_args[4] = { IRB.getInt32(0), SF, ThisFn, ReturnAddress }; + // IRB.CreateCall(CILK_CSI_FUNC(enter_begin, *M), begin_args); + // } + // } + Value *Args[1] = { SF }; + if (Helper || fastCilk) + IRB.CreateCall(CILKRTS_FUNC(enter_frame_fast_1), Args); + else + IRB.CreateCall(CILKRTS_FUNC(enter_frame_1), Args); + + // if (instrument) { + // Value* end_args[2] = { SF, StackSave }; + // IRB.CreateCall(CILK_CSI_FUNC(enter_end, *M), end_args); + // } + + EscapeEnumerator EE(F, "cilkabi_epilogue", false); + while (IRBuilder<> *AtExit = EE.Next()) { + if (isa(AtExit->GetInsertPoint())) + AtExit->CreateCall(GetCilkParentEpilogueFn(instrument), Args, ""); + else if (ResumeInst *RI = dyn_cast(AtExit->GetInsertPoint())) { + // /* + // sf.flags = sf.flags | CILK_FRAME_EXCEPTING; + // sf.except_data = Exn; + // */ + // IRBuilder<> B(RI); + // Value *Exn = AtExit->CreateExtractValue(RI->getValue(), + // ArrayRef(0)); + // Value *Flags = LoadSTyField(*AtExit, DL, StackFrameTy, SF, + // StackFrameFields::flags, + // /*isVolatile=*/false, + // AtomicOrdering::Acquire); + // Flags = AtExit->CreateOr(Flags, + // ConstantInt::get(Flags->getType(), + // CILK_FRAME_EXCEPTING)); + // StoreSTyField(*AtExit, DL, StackFrameTy, Flags, SF, + // StackFrameFields::flags, /*isVolatile=*/false, + // AtomicOrdering::Release); + // StoreSTyField(*AtExit, DL, StackFrameTy, Exn, SF, + // StackFrameFields::except_data, /*isVolatile=*/false, + // AtomicOrdering::Release); + /* + __cilkrts_pop_frame(&sf); + if (sf->flags) + __cilkrts_leave_frame(&sf); + */ + AtExit->CreateCall(GetCilkParentEpilogueFn(instrument), Args, ""); + } + } + + return SF; +} + +bool CilkABI::makeFunctionDetachable(Function &Extracted, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool instrument) { + /* + __cilkrts_stack_frame sf; + __cilkrts_enter_frame_fast_1(&sf); + __cilkrts_detach(); + *x = f(y); + */ + + const DataLayout& DL = M.getDataLayout(); + AllocaInst *SF = CreateStackFrame(Extracted); + DetachCtxToStackFrame[&Extracted] = SF; + assert(SF && "Error creating Cilk stack frame in helper."); + Value *Args[1] = { SF }; + + // Scan function to see if it detaches. + LLVM_DEBUG({ + bool SimpleHelper = !canDetach(&Extracted); + if (!SimpleHelper) + dbgs() << "NOTE: Detachable helper function itself detaches.\n"; + }); + + BasicBlock::iterator InsertPt = ++SF->getIterator(); + IRBuilder<> IRB(&(Extracted.getEntryBlock()), InsertPt); + if (TaskFrameCreate) + IRB.SetInsertPoint(TaskFrameCreate); + + // if (instrument) { + // Type *Int8PtrTy = IRB.getInt8PtrTy(); + // Value *ThisFn = ConstantExpr::getBitCast(&Extracted, Int8PtrTy); + // Value *ReturnAddress = + // IRB.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::returnaddress), + // IRB.getInt32(0)); + // StackSave = + // IRB.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stacksave)); + // Value *begin_args[3] = { SF, ThisFn, ReturnAddress }; + // IRB.CreateCall(CILK_CSI_FUNC(enter_helper_begin, *M), begin_args); + // } + + IRB.CreateCall(CILKRTS_FUNC(enter_frame_fast_1), Args); + + // if (instrument) { + // Value *end_args[2] = { SF, StackSave }; + // IRB.CreateCall(CILK_CSI_FUNC(enter_end, *M), end_args); + // } + + // __cilkrts_detach() + { + // if (instrument) + // IRB.CreateCall(CILK_CSI_FUNC(detach_begin, *M), args); + if (DetachPt) + IRB.SetInsertPoint(DetachPt); + IRB.CreateCall(CILKRTS_FUNC(detach), Args); + + // if (instrument) + // IRB.CreateCall(CILK_CSI_FUNC(detach_end, *M)); + } + + EscapeEnumerator EE(Extracted, "cilkabi_epilogue", false); + while (IRBuilder<> *AtExit = EE.Next()) { + if (isa(AtExit->GetInsertPoint())) + AtExit->CreateCall(GetCilkParentEpilogueFn(instrument), Args, ""); + else if (ResumeInst *RI = dyn_cast(AtExit->GetInsertPoint())) { + /* + sf.flags = sf.flags | CILK_FRAME_EXCEPTING; + sf.except_data = Exn; + */ + IRBuilder<> B(RI); + Value *Exn = AtExit->CreateExtractValue(RI->getValue(), { 0 }); + Value *Flags = LoadSTyField(*AtExit, DL, StackFrameTy, SF, + StackFrameFields::flags, + /*isVolatile=*/false, + AtomicOrdering::Acquire); + Flags = AtExit->CreateOr(Flags, + ConstantInt::get(Flags->getType(), + CILK_FRAME_EXCEPTING)); + StoreSTyField(*AtExit, DL, StackFrameTy, Flags, SF, + StackFrameFields::flags, /*isVolatile=*/false, + AtomicOrdering::Release); + StoreSTyField(*AtExit, DL, StackFrameTy, Exn, SF, + StackFrameFields::except_data, /*isVolatile=*/false, + AtomicOrdering::Release); + /* + __cilkrts_pop_frame(&sf); + if (sf->flags) + __cilkrts_leave_frame(&sf); + */ + AtExit->CreateCall(GetCilkParentEpilogueFn(instrument), Args, ""); + } + } + + return true; +} + +/// Lower a call to get the grainsize of this Tapir loop. +/// +/// The grainsize is computed by the following equation: +/// +/// Grainsize = min(2048, ceil(Limit / (8 * workers))) +/// +/// This computation is inserted into the preheader of the loop. +Value *CilkABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Limit = GrainsizeCall->getArgOperand(0); + IRBuilder<> Builder(GrainsizeCall); + + // Get 8 * workers + Value *Workers = Builder.CreateCall(CILKRTS_FUNC(get_nworkers)); + Value *WorkersX8 = Builder.CreateIntCast( + Builder.CreateMul(Workers, ConstantInt::get(Workers->getType(), 8)), + Limit->getType(), false); + // Compute ceil(limit / 8 * workers) = + // (limit + 8 * workers - 1) / (8 * workers) + Value *SmallLoopVal = + Builder.CreateUDiv(Builder.CreateSub(Builder.CreateAdd(Limit, WorkersX8), + ConstantInt::get(Limit->getType(), 1)), + WorkersX8); + // Compute min + Value *LargeLoopVal = ConstantInt::get(Limit->getType(), 2048); + Value *Cmp = Builder.CreateICmpULT(LargeLoopVal, SmallLoopVal); + Value *Grainsize = Builder.CreateSelect(Cmp, LargeLoopVal, SmallLoopVal); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +void CilkABI::lowerSync(SyncInst &SI) { + Function &Fn = *SI.getFunction(); + + Value *SF = GetOrInitCilkStackFrame(Fn, /*Helper*/false, false); + Value *args[] = { SF }; + assert(args[0] && "sync used in function without frame!"); + + Instruction *SyncUnwind = nullptr; + BasicBlock *SyncCont = SI.getSuccessor(0); + BasicBlock *SyncUnwindDest = nullptr; + if (InvokeInst *II = + dyn_cast(SyncCont->getFirstNonPHIOrDbgOrLifetime())) { + if (const Function *Called = II->getCalledFunction()) { + if (Intrinsic::sync_unwind == Called->getIntrinsicID()) { + SyncUnwind = II; + SyncCont = II->getNormalDest(); + SyncUnwindDest = II->getUnwindDest(); + } + } + } + CallBase *CB; + if (!SyncUnwindDest) { + if (Fn.doesNotThrow()) + CB = CallInst::Create(GetCilkSyncNothrowFn(), args, "", + /*insert before*/&SI); + else + CB = CallInst::Create(GetCilkSyncFn(), args, "", /*insert before*/&SI); + + BranchInst::Create(SyncCont, CB->getParent()); + } else { + CB = InvokeInst::Create(GetCilkSyncFn(), SyncCont, SyncUnwindDest, args, "", + /*insert before*/&SI); + for (PHINode &PN : SyncCont->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + for (PHINode &PN : SyncUnwindDest->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + } + CB->setDebugLoc(SI.getDebugLoc()); + SI.eraseFromParent(); + + // Remember to inline this call later. + CallsToInline.insert(CB); + + // Mark this function as stealable. + Fn.addFnAttr(Attribute::Stealable); +} + +void CilkABI::preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + NamedRegionTimer NRT("processOutlinedTask", "Process outlined task", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + makeFunctionDetachable(F, DetachPt, TaskFrameCreate, false); + if (IsSpawner) + preProcessRootSpawner(F, TFEntry); +} + +void CilkABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) {} + +void CilkABI::preProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + NamedRegionTimer NRT("processSpawner", "Process spawner", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + GetOrInitCilkStackFrame(F, /*Helper=*/false, false); + + // Mark this function as stealable. + F.addFnAttr(Attribute::Stealable); +} + +void CilkABI::postProcessRootSpawner(Function &F, BasicBlock *TFEntry) {} + +void CilkABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { + NamedRegionTimer NRT("processSubTaskCall", "Process subtask call", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Instruction *ReplStart = TOI.ReplStart; + Instruction *ReplCall = TOI.ReplCall; + BasicBlock *UnwindDest = TOI.ReplUnwind; + Function *Parent = ReplCall->getFunction(); + + LLVM_DEBUG(dbgs() << "CilkABI::processSubTaskCall: " << *ReplCall << "\n"); + + Function &F = *ReplCall->getFunction(); + assert(DetachCtxToStackFrame.count(&F) && + "No frame found for spawning task."); + Value *SF = DetachCtxToStackFrame[&F]; + + if (InvokeInst *II = dyn_cast(ReplCall)) { + LandingPadInst *LPI = II->getLandingPadInst(); + IRBuilder<> B(&*II->getUnwindDest()->getFirstInsertionPt()); + Value *Exn = B.CreateExtractValue(LPI, { 0 }); + Value *NewExn = B.CreateCall(GetCilkCatchExceptionFn(Exn->getType()), + { SF, Exn }); + B.CreateInsertValue(LPI, NewExn, { 0 }); + } + + // Split the basic block containing the detach replacement just before the + // start of the detach-replacement instructions. + BasicBlock *DetBlock = ReplStart->getParent(); + BasicBlock *CallBlock = SplitBlock(DetBlock, ReplStart, &DT); + + // Emit a Cilk setjmp at the end of the block preceding the split-off detach + // replacement. + Instruction *SetJmpPt = DetBlock->getTerminator(); + IRBuilder<> B(SetJmpPt); + Value *SetJmpRes = EmitCilkSetJmp(B, SF); + + // Get the ordinary continuation of the detach. + BasicBlock *CallCont; + if (InvokeInst *II = dyn_cast(ReplCall)) + CallCont = II->getNormalDest(); + else // isa(CallSite) + CallCont = CallBlock->getSingleSuccessor(); + + // Insert a conditional branch, based on the result of the setjmp, to either + // the detach replacement or the continuation. + SetJmpRes = B.CreateICmpEQ(SetJmpRes, + ConstantInt::get(SetJmpRes->getType(), 0)); + B.CreateCondBr(SetJmpRes, CallBlock, CallCont); + // Add DetBlock as a predecessor for all Phi nodes in CallCont. These Phi + // nodes receive the same value from DetBlock as from CallBlock. + for (PHINode &Phi : CallCont->phis()) + Phi.addIncoming(Phi.getIncomingValueForBlock(CallBlock), DetBlock); + SetJmpPt->eraseFromParent(); + + // If we're not using dynamic argument structs, then no further processing is + // needed. + if (ArgStructMode::Dynamic != getArgStructMode()) + return; + + // Create a separate spawn-helper function to allocate and populate the + // argument struct. + + // Inputs to the spawn helper + ValueSet SHInputSet = TOI.InputSet; + ValueSet SHInputs; + fixupInputSet(*Parent, SHInputSet, SHInputs); + LLVM_DEBUG({ + dbgs() << "SHInputSet:\n"; + for (Value *V : SHInputSet) + dbgs() << "\t" << *V << "\n"; + dbgs() << "SHInputs:\n"; + for (Value *V : SHInputs) + dbgs() << "\t" << *V << "\n"; + }); + ValueSet Outputs; // Should be empty. + // Only one block needs to be cloned into the spawn helper + std::vector BlocksToClone; + BlocksToClone.push_back(CallBlock); + SmallVector Returns; // Ignore returns cloned. + ValueToValueMapTy VMap; + Twine NameSuffix = ".shelper"; + Function *SpawnHelper = + CreateHelper(SHInputs, Outputs, BlocksToClone, CallBlock, DetBlock, + CallCont, VMap, &M, Parent->getSubprogram() != nullptr, + Returns, NameSuffix.str(), nullptr, nullptr, nullptr, + UnwindDest); + + assert(Returns.empty() && "Returns cloned when creating SpawnHelper."); + + // Use a fast calling convention for the helper. + SpawnHelper->setCallingConv(CallingConv::Fast); + // Add attributes to new helper function. + SpawnHelper->addFnAttr(Attribute::NoInline); + if (!UnwindDest) { + SpawnHelper->addFnAttr(Attribute::NoUnwind); + SpawnHelper->addFnAttr(Attribute::UWTable); + } + // Note that the address of the helper is unimportant. + SpawnHelper->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // The helper is internal to this module. + SpawnHelper->setLinkage(GlobalValue::InternalLinkage); + + // Add alignment assumptions to arguments of helper, based on alignment of + // values in old function. + AddAlignmentAssumptions(Parent, SHInputs, VMap, ReplCall, nullptr, nullptr); + + // Move allocas in the newly cloned block to the entry block of the helper. + { + // Collect the end instructions of the task. + SmallVector Ends; + // Ends.push_back(cast(VMap[CallBlock])->getTerminator()); + Ends.push_back(cast(VMap[CallCont])->getTerminator()); + if (isa(ReplCall)) + Ends.push_back(cast(VMap[UnwindDest])->getTerminator()); + + // Move allocas in cloned detached block to entry of helper function. + BasicBlock *ClonedBlock = cast(VMap[CallBlock]); + MoveStaticAllocasInBlock(&SpawnHelper->getEntryBlock(), ClonedBlock, Ends); + + // We do not need to add new llvm.stacksave/llvm.stackrestore intrinsics, + // because calling and returning from the helper will automatically manage + // the stack appropriately. + } + + // Insert a call to the spawn helper. + SmallVector SHInputVec; + for (Value *V : SHInputs) + SHInputVec.push_back(V); + SplitEdge(DetBlock, CallBlock); + B.SetInsertPoint(CallBlock->getTerminator()); + if (isa(ReplCall)) { + InvokeInst *SpawnHelperCall = InvokeInst::Create(SpawnHelper, CallCont, + UnwindDest, SHInputVec); + SpawnHelperCall->setDebugLoc(ReplCall->getDebugLoc()); + SpawnHelperCall->setCallingConv(SpawnHelper->getCallingConv()); + // The invoke of the spawn helper can replace the terminator in CallBlock. + ReplaceInstWithInst(CallBlock->getTerminator(), SpawnHelperCall); + } else { + CallInst *SpawnHelperCall = B.CreateCall(SpawnHelper, SHInputVec); + SpawnHelperCall->setDebugLoc(ReplCall->getDebugLoc()); + SpawnHelperCall->setCallingConv(SpawnHelper->getCallingConv()); + SpawnHelperCall->setDoesNotThrow(); + // Branch around CallBlock. Its contents are now dead. + ReplaceInstWithInst(CallBlock->getTerminator(), + BranchInst::Create(CallCont)); + } +} + +// Helper function to inline calls to compiler-generated Cilk Plus runtime +// functions when possible. This inlining is necessary to properly implement +// some Cilk runtime "calls," such as __cilk_sync(). +static inline void inlineCilkFunctions( + Function &F, SmallPtrSetImpl &CallsToInline) { + for (CallBase *CB : CallsToInline) { + InlineFunctionInfo IFI; + InlineFunction(*CB, IFI); + } + CallsToInline.clear(); +} + +bool CilkABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return false; + + LLVM_DEBUG(dbgs() << "CilkABI processing function " << F.getName() << "\n"); + if (fastCilk && F.getName() == "main") { + IRBuilder<> B(F.getEntryBlock().getTerminator()); + B.CreateCall(CILKRTS_FUNC(init)); + } + return false; +} + +void CilkABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return; + + NamedRegionTimer NRT("postProcessFunction", "Post-process function", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + if (!DebugABICalls) + inlineCilkFunctions(F, CallsToInline); +} + +void CilkABI::postProcessHelper(Function &F) {} + +LoopOutlineProcessor * +CilkABI::getLoopOutlineProcessor(const TapirLoopInfo *TL) const { + if (UseRuntimeCilkFor) + return new RuntimeCilkFor(M); + return nullptr; +} diff --git a/llvm/lib/Transforms/Tapir/CilkRTSCilkFor.cpp b/llvm/lib/Transforms/Tapir/CilkRTSCilkFor.cpp new file mode 100644 index 00000000000000..fc9b4da4dd8dd5 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/CilkRTSCilkFor.cpp @@ -0,0 +1,302 @@ +//===- CilkRTSCilkFor.cpp - Interface to __cilkrts_cilk_for ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a loop-outline processor to lower Tapir loops to a call +// to a Cilk runtime method, __cilkrts_cilk_for. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/CilkRTSCilkFor.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +using namespace llvm; + +#define DEBUG_TYPE "cilkrtscilkfor" + +STATISTIC(LoopsUsingRuntimeCilkFor, + "Number of Tapir loops implemented using runtime cilk_for"); + +cl::opt llvm::UseRuntimeCilkFor( + "cilk-use-runtime-cilkfor", cl::init(false), cl::Hidden, + cl::desc("Insert a call into the Cilk runtime to handle cilk_for loops")); + +#define CILKRTS_FUNC(name) Get__cilkrts_##name() + +FunctionCallee RuntimeCilkFor::Get__cilkrts_cilk_for_32() { + if (CilkRTSCilkFor32) + return CilkRTSCilkFor32; + + LLVMContext &C = M.getContext(); + Type *VoidTy = Type::getVoidTy(C); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *CountTy = Type::getInt32Ty(C); + FunctionType *BodyTy = FunctionType::get(VoidTy, + {VoidPtrTy, CountTy, CountTy}, + false); + FunctionType *FTy = + FunctionType::get(VoidTy, + {PointerType::getUnqual(BodyTy), VoidPtrTy, CountTy, + Type::getInt32Ty(C)}, false); + CilkRTSCilkFor32 = M.getOrInsertFunction("__cilkrts_cilk_for_32", FTy); + + return CilkRTSCilkFor32; +} + +FunctionCallee RuntimeCilkFor::Get__cilkrts_cilk_for_64() { + if (CilkRTSCilkFor64) + return CilkRTSCilkFor64; + + LLVMContext &C = M.getContext(); + Type *VoidTy = Type::getVoidTy(C); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *CountTy = Type::getInt64Ty(C); + FunctionType *BodyTy = FunctionType::get(VoidTy, + {VoidPtrTy, CountTy, CountTy}, + false); + FunctionType *FTy = + FunctionType::get(VoidTy, + {PointerType::getUnqual(BodyTy), VoidPtrTy, CountTy, + Type::getInt32Ty(C)}, false); + CilkRTSCilkFor64 = M.getOrInsertFunction("__cilkrts_cilk_for_64", FTy); + + return CilkRTSCilkFor64; +} + +void RuntimeCilkFor::setupLoopOutlineArgs( + Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, + ValueSet &InputSet, const SmallVectorImpl &LCArgs, + const SmallVectorImpl &LCInputs, const ValueSet &TLInputsFixed) { + // Add the argument structure + HelperArgs.insert(TLInputsFixed[0]); + HelperInputs.push_back(TLInputsFixed[0]); + + // Add the loop-control inputs. + auto LCArgsIter = LCArgs.begin(); + auto LCInputsIter = LCInputs.begin(); + // First, add the start iteration. + HelperArgs.insert(*LCArgsIter); + HelperInputs.push_back(*LCInputsIter); + if (!isa(*LCInputsIter)) + InputSet.insert(*LCInputsIter); + // Next, add the end iteration. + ++LCArgsIter; + ++LCInputsIter; + HelperArgs.insert(*LCArgsIter); + HelperInputs.push_back(*LCInputsIter); + if (!isa(*LCInputsIter)) + InputSet.insert(*LCInputsIter); + + // Save the third loop-control input -- the grainsize -- for use later. + ++LCArgsIter; + ++LCInputsIter; + HelperArgs.insert(*LCArgsIter); + HelperInputs.push_back(*LCInputsIter); + if (!isa(*LCInputsIter)) + InputSet.insert(*LCInputsIter); +} + +unsigned RuntimeCilkFor::getIVArgIndex(const Function &F, const ValueSet &Args) + const { + // The argument for the primary induction variable is the second input. + return 1; +} + +void RuntimeCilkFor::postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) { + Function *Helper = Out.Outline; + // If the helper uses an argument structure, then it is not a write-only + // function. + if (getArgStructMode() != ArgStructMode::None) { + Helper->removeFnAttr(Attribute::WriteOnly); + Helper->setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); + } +} + +void RuntimeCilkFor::processOutlinedLoopCall(TapirLoopInfo &TL, + TaskOutlineInfo &TOI, + DominatorTree &DT) { + Function *Outlined = TOI.Outline; + Instruction *ReplStart = TOI.ReplStart; + Instruction *ReplCall = TOI.ReplCall; + CallBase *CB = cast(ReplCall); + BasicBlock *CallCont = TOI.ReplRet; + BasicBlock *UnwindDest = TOI.ReplUnwind; + Function *Parent = ReplCall->getFunction(); + Module &M = *Parent->getParent(); + unsigned IVArgIndex = getIVArgIndex(*Parent, TOI.InputSet); + Type *PrimaryIVTy = + CB->getArgOperand(IVArgIndex)->getType(); + Value *TripCount = CB->getArgOperand(IVArgIndex + 1); + Value *GrainsizeVal = CB->getArgOperand(IVArgIndex + 2); + + // Get the correct CilkForABI call. + FunctionCallee CilkForABI; + if (PrimaryIVTy->isIntegerTy(32)) + CilkForABI = CILKRTS_FUNC(cilk_for_32); + else if (PrimaryIVTy->isIntegerTy(64)) + CilkForABI = CILKRTS_FUNC(cilk_for_64); + else + llvm_unreachable("No CilkForABI call matches IV type for Tapir loop."); + + // Get the grainsize input + Value *GrainsizeInput; + { + IRBuilder<> B(ReplCall); + GrainsizeInput = B.CreateIntCast(GrainsizeVal, GrainsizeType, + /*isSigned*/ false); + } + + // Split the basic block containing the detach replacement just before the + // start of the detach-replacement instructions. + BasicBlock *DetBlock = ReplStart->getParent(); + BasicBlock *CallBlock = SplitBlock(DetBlock, ReplStart); + + LLVMContext &C = M.getContext(); + + // Insert a call or invoke to the cilk_for ABI method. + LLVM_DEBUG(dbgs() << "RuntimeCilkFor: Adding call to __cilkrts_cilk_for\n"); + IRBuilder<> B(ReplCall); + Type *FPtrTy = PointerType::getUnqual( + FunctionType::get(Type::getVoidTy(C), + { Type::getInt8PtrTy(C), PrimaryIVTy, PrimaryIVTy }, + false)); + Value *OutlinedFnPtr = B.CreatePointerBitCastOrAddrSpaceCast(Outlined, + FPtrTy); + AllocaInst *ArgStruct = cast(CB->getArgOperand(0)); + Value *ArgStructPtr = B.CreateBitCast(ArgStruct, Type::getInt8PtrTy(C)); + if (UnwindDest) { + InvokeInst *Invoke = InvokeInst::Create(CilkForABI, CallCont, UnwindDest, + { OutlinedFnPtr, ArgStructPtr, + TripCount, GrainsizeInput }); + Invoke->setDebugLoc(ReplCall->getDebugLoc()); + ReplaceInstWithInst(ReplCall, Invoke); + TOI.replaceReplCall(Invoke); + } else { + CallInst *Call = B.CreateCall(CilkForABI, + { OutlinedFnPtr, ArgStructPtr, + TripCount, GrainsizeInput }); + Call->setDebugLoc(ReplCall->getDebugLoc()); + Call->setDoesNotThrow(); + TOI.replaceReplCall(Call); + ReplCall->eraseFromParent(); + } + + ++LoopsUsingRuntimeCilkFor; + + // If we're not using dynamic argument structs, then no further processing is + // needed. + if (ArgStructMode::Dynamic != getArgStructMode()) + return; + + // N.B. The following code to outline the invocation of the __cilkrts_cilk_for + // call, is primarily included for debugging purposes. In practice, this code + // should not run, because the __cilkrts_cilk_for ABI should work with a + // static structure. + LLVM_DEBUG(dbgs() << "RuntimeCilkFor: Adding additional spawn helper to " + << "manage dynamic argument-struct allocation.\n"); + + // Update the value of ReplCall. + ReplCall = TOI.ReplCall; + // Create a separate spawn-helper function to allocate and populate the + // argument struct. + // Inputs to the spawn helper + ValueSet SHInputSet = TOI.InputSet; + SHInputSet.insert(GrainsizeVal); + ValueSet SHInputs; + fixupInputSet(*Parent, SHInputSet, SHInputs); + LLVM_DEBUG({ + dbgs() << "SHInputSet:\n"; + for (Value *V : SHInputSet) + dbgs() << "\t" << *V << "\n"; + dbgs() << "SHInputs:\n"; + for (Value *V : SHInputs) + dbgs() << "\t" << *V << "\n"; + }); + + ValueSet Outputs; // Should be empty. + // Only one block needs to be cloned into the spawn helper + std::vector BlocksToClone; + BlocksToClone.push_back(CallBlock); + SmallVector Returns; // Ignore returns cloned. + ValueToValueMapTy VMap; + Twine NameSuffix = ".shelper"; + Function *SpawnHelper = + CreateHelper(SHInputs, Outputs, BlocksToClone, CallBlock, DetBlock, + CallCont, VMap, &M, Parent->getSubprogram() != nullptr, + Returns, NameSuffix.str(), nullptr, nullptr, nullptr, + UnwindDest); + + assert(Returns.empty() && "Returns cloned when creating SpawnHelper."); + + // If there is no unwind destination, then the SpawnHelper cannot throw. + if (!UnwindDest) + SpawnHelper->setDoesNotThrow(); + + // Add attributes to new helper function. + // + // Use a fast calling convention for the helper. + SpawnHelper->setCallingConv(CallingConv::Fast); + // Note that the address of the helper is unimportant. + SpawnHelper->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // The helper is internal to this module. + SpawnHelper->setLinkage(GlobalValue::InternalLinkage); + + // Add alignment assumptions to arguments of helper, based on alignment of + // values in old function. + AddAlignmentAssumptions(Parent, SHInputs, VMap, ReplCall, nullptr, nullptr); + + // Move allocas in the newly cloned block to the entry block of the helper. + { + // Collect the end instructions of the task. + SmallVector Ends; + Ends.push_back(cast(VMap[CallCont])->getTerminator()); + if (isa(ReplCall)) + Ends.push_back(cast(VMap[UnwindDest])->getTerminator()); + + // Move allocas in cloned detached block to entry of helper function. + BasicBlock *ClonedBlock = cast(VMap[CallBlock]); + MoveStaticAllocasInBlock(&SpawnHelper->getEntryBlock(), ClonedBlock, Ends); + + // We do not need to add new llvm.stacksave/llvm.stackrestore intrinsics, + // because calling and returning from the helper will automatically manage + // the stack appropriately. + } + + // Insert a call to the spawn helper. + SmallVector SHInputVec; + for (Value *V : SHInputs) + SHInputVec.push_back(V); + SplitEdge(DetBlock, CallBlock); + B.SetInsertPoint(CallBlock->getTerminator()); + if (isa(ReplCall)) { + InvokeInst *SpawnHelperCall = InvokeInst::Create(SpawnHelper, CallCont, + UnwindDest, SHInputVec); + SpawnHelperCall->setDebugLoc(ReplCall->getDebugLoc()); + SpawnHelperCall->setCallingConv(SpawnHelper->getCallingConv()); + ReplaceInstWithInst(CallBlock->getTerminator(), SpawnHelperCall); + } else { + CallInst *SpawnHelperCall = B.CreateCall(SpawnHelper, SHInputVec); + SpawnHelperCall->setDebugLoc(ReplCall->getDebugLoc()); + SpawnHelperCall->setCallingConv(SpawnHelper->getCallingConv()); + SpawnHelperCall->setDoesNotThrow(); + // Branch around CallBlock. Its contents are now dead. + ReplaceInstWithInst(CallBlock->getTerminator(), + BranchInst::Create(CallCont)); + } +} diff --git a/llvm/lib/Transforms/Tapir/DRFScopedNoAliasAA.cpp b/llvm/lib/Transforms/Tapir/DRFScopedNoAliasAA.cpp new file mode 100644 index 00000000000000..d590406be7e5a0 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/DRFScopedNoAliasAA.cpp @@ -0,0 +1,332 @@ +//===- DRFScopedNoAliasAA.cpp - DRF-based scoped-noalias metadata ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Adds scoped-noalias metadata to memory accesses based on Tapir's parallel +// control flow constructs and the assumption that the function is data-race +// free. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/DRFScopedNoAliasAA.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Tapir.h" + +#define DEBUG_TYPE "drf-scoped-noalias" + +using namespace llvm; + +/// Process Tapir loops within the given function for loop spawning. +class DRFScopedNoAliasImpl { +public: + DRFScopedNoAliasImpl(Function &F, TaskInfo &TI, AliasAnalysis &AA, + LoopInfo *LI) + : F(F), TI(TI), LI(LI) { + TI.evaluateParallelState(MPTasks); + } + + bool run(); + +private: + bool populateTaskScopeNoAlias(); + + bool populateSubTaskScopeNoAlias( + const Task *T, MDBuilder &MDB, SmallVectorImpl &CurrScopes, + SmallVectorImpl &CurrNoAlias, + DenseMap &TaskToScope); + + bool populateTaskScopeNoAliasInBlock( + const Task *T, BasicBlock *BB, MDBuilder &MDB, + SmallVectorImpl &Scopes, + SmallVectorImpl &NoAlias); + + Function &F; + TaskInfo &TI; + LoopInfo *LI; + + MaybeParallelTasks MPTasks; +}; + +namespace { +struct DRFScopedNoAliasWrapperPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + explicit DRFScopedNoAliasWrapperPass() : FunctionPass(ID) { + initializeDRFScopedNoAliasWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "Assume DRF to Add Scoped-No-Alias Metadata"; + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; +} // End of anonymous namespace + +char DRFScopedNoAliasWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(DRFScopedNoAliasWrapperPass, "drf-scoped-noalias", + "Add DRF-based scoped-noalias metadata", + false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(DRFScopedNoAliasWrapperPass, "drf-scoped-noalias", + "Add DRF-based scoped-noalias metadata", + false, false) + +bool DRFScopedNoAliasImpl::populateTaskScopeNoAliasInBlock( + const Task *T, BasicBlock *BB, MDBuilder &MDB, + SmallVectorImpl &Scopes, SmallVectorImpl &NoAlias) { + LLVM_DEBUG(dbgs() << "Processing block " << BB->getName() << " in task " + << T->getEntry()->getName() << "\n"); + for (Instruction &I : *BB) { + bool IsArgMemOnlyCall = false, IsFuncCall = false; + SmallVector PtrArgs; + + if (const LoadInst *LI = dyn_cast(&I)) + PtrArgs.push_back(LI->getPointerOperand()); + else if (const StoreInst *SI = dyn_cast(&I)) + PtrArgs.push_back(SI->getPointerOperand()); + else if (const VAArgInst *VAAI = dyn_cast(&I)) + PtrArgs.push_back(VAAI->getPointerOperand()); + else if (const AtomicCmpXchgInst *CXI = dyn_cast(&I)) + PtrArgs.push_back(CXI->getPointerOperand()); + else if (const AtomicRMWInst *RMWI = dyn_cast(&I)) + PtrArgs.push_back(RMWI->getPointerOperand()); + else if (const CallBase *ICS = dyn_cast(&I)) { + // We don't need to worry about callsites that don't access memory. + if (ICS->doesNotAccessMemory()) + continue; + + IsFuncCall = true; + if (ICS->onlyAccessesArgMemory()) + IsArgMemOnlyCall = true; + + for (Value *Arg : ICS->args()) { + // We need to check the underlying objects of all arguments, not just + // the pointer arguments, because we might be passing pointers as + // integers, etc. + // However, if we know that the call only accesses pointer arguments, + // then we only need to check the pointer arguments. + if (IsArgMemOnlyCall && !Arg->getType()->isPointerTy()) + continue; + + PtrArgs.push_back(Arg); + } + } + + // If we found no pointers, then this instruction is not suitable for + // pairing with an instruction to receive aliasing metadata. However, if + // this is a call, this we might just alias with none of the noalias + // arguments. + if (PtrArgs.empty() && !IsFuncCall) + continue; + + // It is possible that there is only one underlying object, but you need to + // go through several PHIs to see it, and thus could be repeated in the + // Objects list. + bool UsesObjectOutsideTask = false; + for (const Value *V : PtrArgs) { + SmallVector Objects; + getUnderlyingObjects(const_cast(V), Objects, LI); + + for (const Value *O : Objects) { + LLVM_DEBUG(dbgs() << "Checking object " << *O << "\n"); + // Check if this value is a constant that cannot be derived from any + // pointer value (we need to exclude constant expressions, for example, + // that are formed from arithmetic on global symbols). + bool IsNonPtrConst = isa(V) || isa(V) || + isa(V) || + isa(V) || isa(V); + if (IsNonPtrConst) + continue; + + // Check if this object was created in this task. + if (const Instruction *OI = dyn_cast(O)) + if (TI.getTaskFor(OI->getParent()) == T) + continue; + + // This object exists outside the task. + UsesObjectOutsideTask = true; + break; + } + // Quit early if a pointer argument is found that refers to an object + // allocated outside of this task. + if (UsesObjectOutsideTask) + break; + } + + // If this instruction does not refer to an object outside of the task, + // don't add noalias metadata. + if (!UsesObjectOutsideTask) { + LLVM_DEBUG(dbgs() << "Instruction " << I + << " does not use object outside of task " + << T->getEntry()->getName() << "\n"); + continue; + } + + if (!NoAlias.empty()) + I.setMetadata(LLVMContext::MD_noalias, + MDNode::concatenate( + I.getMetadata(LLVMContext::MD_noalias), + MDNode::get(F.getContext(), NoAlias))); + + if (!Scopes.empty()) + I.setMetadata( + LLVMContext::MD_alias_scope, + MDNode::concatenate(I.getMetadata(LLVMContext::MD_alias_scope), + MDNode::get(F.getContext(), Scopes))); + } + return true; +} + +bool DRFScopedNoAliasImpl::populateSubTaskScopeNoAlias( + const Task *T, MDBuilder &MDB, SmallVectorImpl &CurrScopes, + SmallVectorImpl &CurrNoAlias, + DenseMap &TaskToScope) { + bool Changed = false; + size_t OrigNoAliasSize = CurrNoAlias.size(); + + // FIXME? Separately handle shared EH spindles. + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + for (const Task *MPT : MPTasks.TaskList[S]) { + // Don't record noalias scopes for maybe-parallel tasks that enclose the + // spindle. These cases arise from parallel loops, which need special + // alias analysis anyway (e.g., LoopAccessAnalysis). + if (!MPT->encloses(S->getEntry())) + CurrNoAlias.push_back(TaskToScope[MPT]); + } + // Populate instructions in spindle with scoped-noalias information. + for (BasicBlock *BB : S->blocks()) + Changed |= + populateTaskScopeNoAliasInBlock(T, BB, MDB, CurrScopes, CurrNoAlias); + + // Remove the noalias scopes for this spindle. + CurrNoAlias.erase(CurrNoAlias.begin() + OrigNoAliasSize, CurrNoAlias.end()); + + // For each successor spindle in a subtask, recursively populate the + // scoped-noalias information in that subtask. + for (Spindle *Succ : successors(S)) { + if (S->succInSubTask(Succ)) { + CurrScopes.push_back(TaskToScope[Succ->getParentTask()]); + populateSubTaskScopeNoAlias(Succ->getParentTask(), MDB, CurrScopes, + CurrNoAlias, TaskToScope); + CurrScopes.pop_back(); + } + } + } + + return Changed; +} + +static void createTaskDomainsAndFullScopes( + const Task *T, MDBuilder &MDB, const Twine ParentName, + DenseMap &TaskToDomain, + DenseMap &TaskToScope) { + // Within the domain of T, create a scope and domain for each subtask. + for (const Task *SubT : T->subtasks()) { + const Twine Name = ParentName + "_" + SubT->getEntry()->getName(); + + MDNode *NewScope = + MDB.createAnonymousAliasScope(TaskToDomain[T], ("taskscp_" + Name).str()); + TaskToScope[SubT] = NewScope; + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(("taskdom_" + Name).str()); + TaskToDomain[SubT] = NewDomain; + + // Recursively create domains and scopes for subtasks. + createTaskDomainsAndFullScopes(SubT, MDB, Name, TaskToDomain, TaskToScope); + } +} + +bool DRFScopedNoAliasImpl::populateTaskScopeNoAlias() { + // Create a domain for the task scopes. + MDBuilder MDB(F.getContext()); + if (TI.isSerial()) + return false; + + DenseMap TaskToDomain; + DenseMap TaskToScope; + + // Create a domain and scope for the root task. + MDNode *NewDomain = + MDB.createAnonymousAliasScopeDomain(("dom_" + F.getName()).str()); + TaskToDomain[TI.getRootTask()] = NewDomain; + MDNode *NewScope = + MDB.createAnonymousAliasScope(NewDomain, ("scp_" + F.getName()).str()); + TaskToScope[TI.getRootTask()] = NewScope; + + // Recursively create task domains and scopes for subtasks. + createTaskDomainsAndFullScopes(TI.getRootTask(), MDB, F.getName(), + TaskToDomain, TaskToScope); + + SmallVector Scopes, NoAlias; + return populateSubTaskScopeNoAlias(TI.getRootTask(), MDB, Scopes, NoAlias, + TaskToScope); +} + +bool DRFScopedNoAliasImpl::run() { + return populateTaskScopeNoAlias(); +} + +bool DRFScopedNoAliasWrapperPass::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + TaskInfo &TI = getAnalysis().getTaskInfo(); + AliasAnalysis &AA = getAnalysis().getAAResults(); + LoopInfo &LI = getAnalysis().getLoopInfo(); + return DRFScopedNoAliasImpl(F, TI, AA, &LI).run(); +} + +// createDRFScopedNoAliasPass - Provide an entry point to create this pass. +// +namespace llvm { +FunctionPass *createDRFScopedNoAliasWrapperPass() { + return new DRFScopedNoAliasWrapperPass(); +} +} // end namespace llvm + +PreservedAnalyses DRFScopedNoAliasPass::run(Function &F, + FunctionAnalysisManager &AM) { + TaskInfo &TI = AM.getResult(F); + AliasAnalysis &AA = AM.getResult(F); + LoopInfo &LI = AM.getResult(F); + + DRFScopedNoAliasImpl(F, TI, AA, &LI).run(); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + return PA; +} diff --git a/llvm/lib/Transforms/Tapir/LambdaABI.cpp b/llvm/lib/Transforms/Tapir/LambdaABI.cpp new file mode 100644 index 00000000000000..e6da261ad7dcf4 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/LambdaABI.cpp @@ -0,0 +1,578 @@ +//===- LambdaABI.cpp - Generic interface to various runtime systems--------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Lambda ABI to convert Tapir instructions to calls +// into a generic runtime system to operates on spawned computations as lambdas. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/LambdaABI.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "lambdaabi" + +extern cl::opt DebugABICalls; + +static cl::opt + ClRuntimeBCPath("tapir-runtime-bc-path", cl::init(""), + cl::desc("Path to the bitcode file for the runtime ABI"), + cl::Hidden); + +static const StringRef StackFrameName = "__rts_sf"; + +namespace { + +// Custom DiagnosticInfo for linking the Lambda ABI bitcode file. +class LambdaABILinkDiagnosticInfo : public DiagnosticInfo { + const Module *SrcM; + const Twine &Msg; + +public: + LambdaABILinkDiagnosticInfo(DiagnosticSeverity Severity, const Module *SrcM, + const Twine &Msg) + : DiagnosticInfo(DK_Lowering, Severity), SrcM(SrcM), Msg(Msg) {} + void print(DiagnosticPrinter &DP) const override { + DP << "linking module '" << SrcM->getModuleIdentifier() << "': " << Msg; + } +}; + +// Custom DiagnosticHandler to handle diagnostics arising when linking the +// Lambda ABI bitcode file. +class LambdaABIDiagnosticHandler final : public DiagnosticHandler { + const Module *SrcM; + DiagnosticHandler *OrigHandler; + +public: + LambdaABIDiagnosticHandler(const Module *SrcM, DiagnosticHandler *OrigHandler) + : SrcM(SrcM), OrigHandler(OrigHandler) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override { + if (DI.getKind() != DK_Linker) + return OrigHandler->handleDiagnostics(DI); + + std::string MsgStorage; + { + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + } + return OrigHandler->handleDiagnostics( + LambdaABILinkDiagnosticInfo(DI.getSeverity(), SrcM, MsgStorage)); + } +}; + +// Structure recording information about runtime ABI functions. +struct RTSFnDesc { + StringRef FnName; + FunctionType *FnType; + FunctionCallee &FnCallee; +}; +} // namespace + +// void LambdaABI::setOptions(const TapirTargetOptions &Options) { +// if (!isa(Options)) +// return; + +// const LambdaABIOptions &OptionsCast = cast(Options); + +// // Get the path to the runtime bitcode file. +// RuntimeBCPath = OptionsCast.getRuntimeBCPath(); +// } + +void LambdaABI::prepareModule() { + LLVMContext &C = M.getContext(); + const DataLayout &DL = DestM.getDataLayout(); + Type *Int8Ty = Type::getInt8Ty(C); + Type *Int16Ty = Type::getInt16Ty(C); + Type *Int32Ty = Type::getInt32Ty(C); + Type *Int64Ty = Type::getInt64Ty(C); + + // If a runtime bitcode path is given via the command line, use it. + if ("" != ClRuntimeBCPath) + RuntimeBCPath = ClRuntimeBCPath; + + if ("" == RuntimeBCPath) { + C.emitError("LambdaABI: No bitcode ABI file given."); + return; + } + + LLVM_DEBUG(dbgs() << "Using external bitcode file for Lambda ABI: " + << RuntimeBCPath << "\n"); + SMDiagnostic SMD; + + // Parse the bitcode file. This call imports structure definitions, but not + // function definitions. + if (std::unique_ptr ExternalModule = + parseIRFile(RuntimeBCPath, SMD, C)) { + // Get the original DiagnosticHandler for this context. + std::unique_ptr OrigDiagHandler = + C.getDiagnosticHandler(); + + // Setup an LambdaABIDiagnosticHandler for this context, to handle + // diagnostics that arise from linking ExternalModule. + C.setDiagnosticHandler(std::make_unique( + ExternalModule.get(), OrigDiagHandler.get())); + + // Link the external module into the current module, copying over global + // values. + // + // TODO: Consider restructuring the import process to use + // Linker::Flags::LinkOnlyNeeded to copy over only the necessary contents + // from the external module. + bool Fail = Linker::linkModules( + M, std::move(ExternalModule), Linker::Flags::None, + [](Module &M, const StringSet<> &GVS) { + for (StringRef GVName : GVS.keys()) { + LLVM_DEBUG(dbgs() << "Linking global value " << GVName << "\n"); + if (Function *Fn = M.getFunction(GVName)) { + if (!Fn->isDeclaration() && !Fn->hasComdat()) + // We set the function's linkage as available_externally, so + // that subsequent optimizations can remove these definitions + // from the module. We don't want this module redefining any of + // these symbols, even if they aren't inlined, because the + // Lambda runtime library will provide those definitions later. + Fn->setLinkage(Function::AvailableExternallyLinkage); + } else if (GlobalVariable *G = M.getGlobalVariable(GVName)) { + if (!G->isDeclaration() && !G->hasComdat()) + G->setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } + }); + if (Fail) + C.emitError("LambdaABI: Failed to link bitcode ABI file: " + + Twine(RuntimeBCPath)); + + // Restore the original DiagnosticHandler for this context. + C.setDiagnosticHandler(std::move(OrigDiagHandler)); + } else { + C.emitError("LambdaABI: Failed to parse bitcode ABI file: " + + Twine(RuntimeBCPath)); + } + + // Get or create local definitions of RTS structure types. + const char *StackFrameName = "struct.__rts_stack_frame"; + StackFrameTy = StructType::lookupOrCreate(C, StackFrameName); + + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Type *VoidTy = Type::getVoidTy(C); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + + // Define the types of the RTS functions. + FunctionType *RTSFnTy = FunctionType::get(VoidTy, {StackFramePtrTy}, false); + SpawnBodyFnArgTy = VoidPtrTy; + Type *IntPtrTy = DL.getIntPtrType(C); + SpawnBodyFnArgSizeTy = IntPtrTy; + SpawnBodyFnTy = FunctionType::get(VoidTy, {SpawnBodyFnArgTy}, false); + FunctionType *SpawnFnTy = + FunctionType::get(VoidTy, + {StackFramePtrTy, PointerType::getUnqual(SpawnBodyFnTy), + SpawnBodyFnArgTy, SpawnBodyFnArgSizeTy, IntPtrTy}, + false); + FunctionType *Grainsize8FnTy = FunctionType::get(Int8Ty, {Int8Ty}, false); + FunctionType *Grainsize16FnTy = FunctionType::get(Int16Ty, {Int16Ty}, false); + FunctionType *Grainsize32FnTy = FunctionType::get(Int32Ty, {Int32Ty}, false); + FunctionType *Grainsize64FnTy = FunctionType::get(Int64Ty, {Int64Ty}, false); + FunctionType *WorkerInfoTy = FunctionType::get(Int32Ty, {}, false); + + // Create an array of RTS functions, with their associated types and + // FunctionCallee member variables in the LambdaABI class. + RTSFnDesc RTSFunctions[] = { + {"__rts_enter_frame", RTSFnTy, RTSEnterFrame}, + {"__rts_spawn", SpawnFnTy, RTSSpawn}, + {"__rts_leave_frame", RTSFnTy, RTSLeaveFrame}, + {"__rts_sync", RTSFnTy, RTSSync}, + {"__rts_sync_nothrow", RTSFnTy, RTSSyncNoThrow}, + {"__rts_loop_grainsize_8", Grainsize8FnTy, RTSLoopGrainsize8}, + {"__rts_loop_grainsize_16", Grainsize16FnTy, RTSLoopGrainsize16}, + {"__rts_loop_grainsize_32", Grainsize32FnTy, RTSLoopGrainsize32}, + {"__rts_loop_grainsize_64", Grainsize64FnTy, RTSLoopGrainsize64}, + {"__rts_get_num_workers", WorkerInfoTy, RTSGetNumWorkers}, + {"__rts_get_worker_id", WorkerInfoTy, RTSGetWorkerID}, + }; + + // Add attributes to internalized functions. + for (RTSFnDesc FnDesc : RTSFunctions) { + assert(!FnDesc.FnCallee && "Redefining RTS function"); + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "Runtime function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + Fn->setDoesNotThrow(); + + // Unless we're debugging, mark the function as always_inline. This + // attribute is required for some functions, but is helpful for all + // functions. + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + else + Fn->removeFnAttr(Attribute::AlwaysInline); + + if (Fn->getName() == "__rts_get_num_workers" || + Fn->getName() == "__rts_get_worker_id") { + Fn->setLinkage(Function::InternalLinkage); + } + } + + // If no valid bitcode file was found fill in the missing pieces. + // An error should have been emitted already unless the user + // set DebugABICalls. + + if (StackFrameTy->isOpaque()) { + // TODO: Figure out better handling of this potential error. + LLVM_DEBUG(dbgs() << "LambdaABI: Failed to find __rts_stack_frame type.\n"); + // Create a dummy __rts_stack_frame structure + StackFrameTy->setBody(Int64Ty); + } + // Create declarations of all RTS functions, and add basic attributes to those + // declarations. + for (RTSFnDesc FnDesc : RTSFunctions) { + if (FnDesc.FnCallee) + continue; + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "RTS function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + Fn->setDoesNotThrow(); + } +} + +void LambdaABI::addHelperAttributes(Function &Helper) { + // Inlining the helper function is not legal. + Helper.removeFnAttr(Attribute::AlwaysInline); + Helper.addFnAttr(Attribute::NoInline); + // If the helper uses an argument structure, then it is not a write-only + // function. + if (getArgStructMode() != ArgStructMode::None) { + Helper.removeFnAttr(Attribute::WriteOnly); + Helper.setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); + } + // Note that the address of the helper is unimportant. + Helper.setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + // The helper is internal to this module. We use internal linkage, rather + // than private linkage, so that tools can still reference the helper + // function. + Helper.setLinkage(GlobalValue::InternalLinkage); +} + +// Check whether the allocation of a __rts_stack_frame can be inserted after +// instruction \p I. +static bool skipInstruction(const Instruction &I) { + if (isa(I)) + return true; + + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(&I)) { + // Skip simple intrinsics + switch (II->getIntrinsicID()) { + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + return true; + default: + return false; + } + } + + return false; +} + +// Scan the basic block \p B to find a point to insert the allocation of a +// __rts_stack_frame. +static Instruction *getStackFrameInsertPt(BasicBlock &B) { + BasicBlock::iterator BI(B.getFirstInsertionPt()); + BasicBlock::const_iterator BE(B.end()); + + // Scan the basic block for the first instruction we should not skip. + while (BI != BE) { + if (!skipInstruction(*BI)) { + return &*BI; + } + ++BI; + } + + // We reached the end of the basic block; return the terminator. + return B.getTerminator(); +} + +/// Create the __rts_stack_frame for the spawning function. +Value *LambdaABI::CreateStackFrame(Function &F) { + const DataLayout &DL = F.getParent()->getDataLayout(); + Type *SFTy = StackFrameTy; + + IRBuilder<> B(getStackFrameInsertPt(F.getEntryBlock())); + AllocaInst *SF = B.CreateAlloca(SFTy, DL.getAllocaAddrSpace(), + /*ArraySize*/ nullptr, + /*Name*/ StackFrameName); + + SF->setAlignment(StackFrameAlign); + + return SF; +} + +Value *LambdaABI::GetOrCreateStackFrame(Function &F) { + if (DetachCtxToStackFrame.count(&F)) + return DetachCtxToStackFrame[&F]; + + Value *SF = CreateStackFrame(F); + DetachCtxToStackFrame[&F] = SF; + + return SF; +} + +// Insert a call in Function F to __rts_enter_frame to initialize the +// __rts_stack_frame in F. If TaskFrameCreate is nonnull, the call to +// __rts_enter_frame is inserted at TaskFrameCreate. +CallInst *LambdaABI::InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate, + bool Helper) { + Instruction *SF = cast(GetOrCreateStackFrame(F)); + + BasicBlock::iterator InsertPt = ++SF->getIterator(); + IRBuilder<> B(&(F.getEntryBlock()), InsertPt); + if (TaskFrameCreate) + B.SetInsertPoint(TaskFrameCreate); + if (!B.getCurrentDebugLocation()) { + // Try to find debug information later in this block for the ABI call. + BasicBlock::iterator BI = B.GetInsertPoint(); + BasicBlock::const_iterator BE(B.GetInsertBlock()->end()); + while (BI != BE) { + if (DebugLoc Loc = BI->getDebugLoc()) { + B.SetCurrentDebugLocation(Loc); + break; + } + ++BI; + } + } + + Value *Args[1] = {SF}; + return B.CreateCall(RTSEnterFrame, Args); +} + +// Insert a call in Function F to pop the stack frame. +// +// PromoteCallsToInvokes dictates whether call instructions that can throw are +// promoted to invoke instructions prior to inserting the epilogue-function +// calls. +void LambdaABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper) { + Value *SF = GetOrCreateStackFrame(F); + SmallPtrSet Returns; + SmallPtrSet Resumes; + + // Add eh cleanup that returns control to the runtime + EscapeEnumerator EE(F, "rts_cleanup", PromoteCallsToInvokes); + while (IRBuilder<> *Builder = EE.Next()) { + if (ResumeInst *RI = dyn_cast(Builder->GetInsertPoint())) { + if (!RI->getDebugLoc()) + // Attempt to set the debug location of this resume to match one of the + // preceeding terminators. + for (const BasicBlock *Pred : predecessors(RI->getParent())) + if (const DebugLoc &Loc = Pred->getTerminator()->getDebugLoc()) { + RI->setDebugLoc(Loc); + break; + } + Resumes.insert(RI); + } else if (ReturnInst *RI = dyn_cast(Builder->GetInsertPoint())) + Returns.insert(RI); + } + + for (ReturnInst *RI : Returns) { + CallInst::Create(RTSLeaveFrame, {SF}, "", RI) + ->setDebugLoc(RI->getDebugLoc()); + } +} + +/// Lower a call to get the grainsize of a Tapir loop. +Value *LambdaABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Limit = GrainsizeCall->getArgOperand(0); + IRBuilder<> Builder(GrainsizeCall); + + // Select the appropriate __rts_grainsize function, based on the type. + FunctionCallee RTSGrainsizeCall; + if (GrainsizeCall->getType()->isIntegerTy(8)) + RTSGrainsizeCall = RTSLoopGrainsize8; + else if (GrainsizeCall->getType()->isIntegerTy(16)) + RTSGrainsizeCall = RTSLoopGrainsize16; + else if (GrainsizeCall->getType()->isIntegerTy(32)) + RTSGrainsizeCall = RTSLoopGrainsize32; + else if (GrainsizeCall->getType()->isIntegerTy(64)) + RTSGrainsizeCall = RTSLoopGrainsize64; + else + llvm_unreachable("No RTSGrainsize call matches type for Tapir loop."); + + Value *Grainsize = Builder.CreateCall(RTSGrainsizeCall, Limit); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +// Lower a sync instruction SI. +void LambdaABI::lowerSync(SyncInst &SI) { + Function &Fn = *SI.getFunction(); + if (!DetachCtxToStackFrame[&Fn]) + // If we have not created a stackframe for this function, then we don't need + // to handle the sync. + return; + + Value *SF = GetOrCreateStackFrame(Fn); + Value *Args[] = {SF}; + assert(Args[0] && "sync used in function without frame!"); + + Instruction *SyncUnwind = nullptr; + BasicBlock *SyncCont = SI.getSuccessor(0); + BasicBlock *SyncUnwindDest = nullptr; + // Determine whether a sync.unwind immediately follows SI. + if (InvokeInst *II = + dyn_cast(SyncCont->getFirstNonPHIOrDbgOrLifetime())) { + if (isSyncUnwind(II)) { + SyncUnwind = II; + SyncCont = II->getNormalDest(); + SyncUnwindDest = II->getUnwindDest(); + } + } + + CallBase *CB; + if (!SyncUnwindDest) { + if (Fn.doesNotThrow()) + CB = CallInst::Create(RTSSyncNoThrow, Args, "", + /*insert before*/ &SI); + else + CB = CallInst::Create(RTSSync, Args, "", /*insert before*/ &SI); + + BranchInst::Create(SyncCont, CB->getParent()); + } else { + CB = InvokeInst::Create(RTSSync, SyncCont, SyncUnwindDest, Args, "", + /*insert before*/ &SI); + for (PHINode &PN : SyncCont->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + for (PHINode &PN : SyncUnwindDest->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + } + CB->setDebugLoc(SI.getDebugLoc()); + SI.eraseFromParent(); + + // Mark this function as stealable. + Fn.addFnAttr(Attribute::Stealable); +} + +bool LambdaABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + return false; +} +void LambdaABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) {} +void LambdaABI::postProcessHelper(Function &F) {} + +void LambdaABI::preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + if (IsSpawner) + InsertStackFramePush(F, TaskFrameCreate, /*Helper*/ true); +} + +void LambdaABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + if (IsSpawner) + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ true, + /*InsertPauseFrame*/ true, /*Helper*/ true); +} + +void LambdaABI::preProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + InsertStackFramePush(F); +} + +void LambdaABI::postProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ false, + /*InsertPauseFrame*/ false, /*Helper*/ false); +} + +void LambdaABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { + const DataLayout &DL = DestM.getDataLayout(); + CallBase *ReplCall = cast(TOI.ReplCall); + + Function &F = *ReplCall->getFunction(); + Value *SF = DetachCtxToStackFrame[&F]; + assert(SF && "No frame found for spawning task"); + + // Get the alignment of the helper arguments. The bitcode-ABI functions may + // use the alignment to align the shared variables in the storage allocated by + // the OpenMP runtime, especially to accommodate vector arguments. + AllocaInst *ArgAlloca = cast(ReplCall->getArgOperand(0)); + uint64_t Alignment = + DL.getPrefTypeAlign(ArgAlloca->getAllocatedType()).value(); + + IRBuilder<> B(ReplCall); + Value *FnCast = B.CreateBitCast(ReplCall->getCalledFunction(), + PointerType::getUnqual(SpawnBodyFnTy)); + Value *ArgCast = + B.CreateBitOrPointerCast(ReplCall->getArgOperand(0), SpawnBodyFnArgTy); + auto ArgSize = + cast(ReplCall->getArgOperand(0))->getAllocationSizeInBits(DL); + assert(ArgSize && + "Could not determine size of compiler-generated ArgStruct."); + Value *ArgSizeVal = ConstantInt::get(SpawnBodyFnArgSizeTy, *ArgSize / 8); + + if (InvokeInst *II = dyn_cast(ReplCall)) { + B.CreateInvoke(RTSSpawn, II->getNormalDest(), II->getUnwindDest(), + {SF, FnCast, ArgCast, ArgSizeVal, B.getInt64(Alignment)}); + } else { + B.CreateCall(RTSSpawn, + {SF, FnCast, ArgCast, ArgSizeVal, B.getInt64(Alignment)}); + } + + ReplCall->eraseFromParent(); +} diff --git a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp new file mode 100644 index 00000000000000..5a3fbb8eb08334 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp @@ -0,0 +1,1767 @@ +//===- LoopSpawningTI.cpp - Spawn loop iterations efficiently -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Modify Tapir loops to spawn their iterations efficiently. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/LoopSpawningTI.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Tapir.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include + +using namespace llvm; + +#define LS_NAME "loop-spawning-ti" +#define DEBUG_TYPE LS_NAME + +STATISTIC(TapirLoopsFound, + "Number of Tapir loops discovered spawning"); +STATISTIC(LoopsConvertedToDAC, + "Number of Tapir loops converted to divide-and-conquer iteration " + "spawning"); + +static const char TimerGroupName[] = DEBUG_TYPE; +static const char TimerGroupDescription[] = "Loop spawning"; + +/// The default loop-outline processor leaves the outlined Tapir loop as is. +class DefaultLoopOutlineProcessor : public LoopOutlineProcessor { +public: + DefaultLoopOutlineProcessor(Module &M) : LoopOutlineProcessor(M) {} + void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) override final { + LoopOutlineProcessor::postProcessOutline(TL, Out, VMap); + addSyncToOutlineReturns(TL, Out, VMap); + } +}; + +/// The DACSpawning loop-outline processor transforms an outlined Tapir loop to +/// evaluate the iterations using parallel recursive divide-and-conquer. +class DACSpawning : public LoopOutlineProcessor { +public: + DACSpawning(Module &M) : LoopOutlineProcessor(M) {} + void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) override final { + LoopOutlineProcessor::postProcessOutline(TL, Out, VMap); + implementDACIterSpawnOnHelper(TL, Out, VMap); + ++LoopsConvertedToDAC; + + // Move Cilksan instrumentation. + moveCilksanInstrumentation(TL, Out, VMap); + + // Add syncs to all exits of the outline. + addSyncToOutlineReturns(TL, Out, VMap); + } + +private: + void implementDACIterSpawnOnHelper( + TapirLoopInfo &TL, TaskOutlineInfo &Out, ValueToValueMapTy &VMap); +}; + +static bool isSRetInput(const Value *V, const Function &F) { + if (!isa(V)) + return false; + + auto ArgIter = F.arg_begin(); + if (F.hasParamAttribute(0, Attribute::StructRet) && V == &*ArgIter) + return true; + ++ArgIter; + if (F.hasParamAttribute(1, Attribute::StructRet) && V == &*ArgIter) + return true; + + return false; +} + +void LoopOutlineProcessor::setupLoopOutlineArgs( + Function &F, ValueSet &HelperArgs, SmallVectorImpl &HelperInputs, + ValueSet &InputSet, const SmallVectorImpl &LCArgs, + const SmallVectorImpl &LCInputs, const ValueSet &TLInputsFixed) { + // Add Tapir-loop inputs to vectors for args and helpers. + // + // First add the sret task input, if it exists. + ValueSet::iterator TLInputIter = TLInputsFixed.begin(); + if ((TLInputIter != TLInputsFixed.end()) && isSRetInput(*TLInputIter, F)) { + HelperArgs.insert(*TLInputIter); + HelperInputs.push_back(*TLInputIter); + ++TLInputIter; + } + + // Then add the loop control inputs. + for (Value *V : LCArgs) + HelperArgs.insert(V); + for (Value *V : LCInputs) { + HelperInputs.push_back(V); + // Add all loop-control inputs to the input set. + InputSet.insert(V); + } + + // Finally add the remaining inputs + while (TLInputIter != TLInputsFixed.end()) { + Value *V = *TLInputIter++; + assert(!HelperArgs.count(V)); + HelperArgs.insert(V); + HelperInputs.push_back(V); + } +} + +unsigned LoopOutlineProcessor::getIVArgIndex(const Function &F, + const ValueSet &Args) const { + // The argument for the primary induction variable is either the first or + // second input, depending on whether there is an sret input. + unsigned IVArgOffset = 0; + if (isSRetInput(Args[IVArgOffset], F)) + ++IVArgOffset; + return IVArgOffset; +} + +void LoopOutlineProcessor::postProcessOutline(TapirLoopInfo &TL, + TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) { + Function *Helper = Out.Outline; + // Use a fast calling convention for the helper. + Helper->setCallingConv(CallingConv::Fast); + // Note that the address of the helper is unimportant. + Helper->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + // The helper is internal to this module. + Helper->setLinkage(GlobalValue::InternalLinkage); +} + +void LoopOutlineProcessor::addSyncToOutlineReturns(TapirLoopInfo &TL, + TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) { + Value *SyncRegion = + cast(VMap[TL.getTask()->getDetach()->getSyncRegion()]); + EscapeEnumerator EE(*Out.Outline, "ls.sync", false); + while (IRBuilder<> *AtExit = EE.Next()) { + // TODO: Add an option to insert syncs before resumes. + if (!isa(*AtExit->GetInsertPoint())) + continue; + + BasicBlock *Exit = AtExit->GetInsertBlock(); + BasicBlock *NewExit = SplitBlock(Exit, Exit->getTerminator()); + SyncInst *NewSync = SyncInst::Create(NewExit, SyncRegion); + ReplaceInstWithInst(Exit->getTerminator(), NewSync); + + // If the helper does not throw, there's no need to insert a sync.unwind. + if (Out.Outline->doesNotThrow()) + return; + + // Insert a call to sync.unwind. + CallInst *SyncUnwind = CallInst::Create( + Intrinsic::getDeclaration(&M, Intrinsic::sync_unwind), + { SyncRegion }, "", NewExit->getFirstNonPHIOrDbg()); + // If the Tapir loop has an unwind destination, change the sync.unwind to an + // invoke that unwinds to the cloned unwind destination. + if (TL.getUnwindDest()) + changeToInvokeAndSplitBasicBlock( + SyncUnwind, cast(VMap[TL.getUnwindDest()])); + } +} + +static void getDependenciesInSameBlock(Instruction *I, + SmallPtrSetImpl &Deps) { + const BasicBlock *Block = I->getParent(); + for (Value *Op : I->operand_values()) + if (Instruction *OpI = dyn_cast(Op)) + if (OpI->getParent() == Block) { + if (!Deps.insert(OpI).second) + continue; + getDependenciesInSameBlock(OpI, Deps); + } +} + +static void moveInstrumentation(StringRef Name, BasicBlock &From, + BasicBlock &To, + Instruction *InsertBefore = nullptr) { + assert((!InsertBefore || InsertBefore->getParent() == &To) && + "Insert point not in To block."); + BasicBlock::iterator InsertPoint = + InsertBefore ? InsertBefore->getIterator() : To.getFirstInsertionPt(); + + // Search the From block for instrumentation to move. + SmallPtrSet ToHoist; + for (Instruction &I : From) { + if (CallBase *CB = dyn_cast(&I)) + if (const Function *Called = CB->getCalledFunction()) + if (Called->getName() == Name) { + ToHoist.insert(&I); + getDependenciesInSameBlock(&I, ToHoist); + } + } + + // If we found no instrumentation to hoist, give up. + if (ToHoist.empty()) + return; + + // Hoist the instrumentation to InsertPoint in the To block. + for (BasicBlock::iterator II = From.begin(), IE = From.end(); II != IE;) { + Instruction *I = dyn_cast(II++); + if (!I || !ToHoist.count(I)) + continue; + + while (isa(II) && ToHoist.count(cast(II))) + ++II; + + To.splice(InsertPoint, &From, I->getIterator(), II); + } +} + +void LoopOutlineProcessor::moveCilksanInstrumentation(TapirLoopInfo &TL, + TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) { + Task *T = TL.getTask(); + Loop *L = TL.getLoop(); + + // Get the header of the cloned loop. + BasicBlock *Header = cast(VMap[L->getHeader()]); + assert(Header && "No cloned header block found"); + + // Get the task entry of the cloned loop. + BasicBlock *TaskEntry = cast(VMap[T->getEntry()]); + assert(TaskEntry && "No cloned task-entry block found"); + + // Get the latch of the cloned loop. + BasicBlock *Latch = cast(VMap[L->getLoopLatch()]); + assert(Latch && "No cloned loop latch found"); + + // Get the normal task exit of the cloned loop. + BasicBlock *TaskExit = Latch->getSinglePredecessor(); + + // Get the preheader of the cloned loop. + BasicBlock *Preheader = nullptr; + for (BasicBlock *Pred : predecessors(Header)) { + if (Latch == Pred) + continue; + Preheader = Pred; + break; + } + if (!Preheader) { + LLVM_DEBUG(dbgs() << "No preheader for hoisting Cilksan instrumentation\n"); + return; + } + + // Get the normal exit of the cloned loop. + BasicBlock *LatchExit = nullptr; + for (BasicBlock *Succ : successors(Latch)) { + if (Header == Succ) + continue; + LatchExit = Succ; + break; + } + if (!LatchExit) { + LLVM_DEBUG( + dbgs() << "No normal exit for hoisting Cilksan instrumentation\n"); + return; + } + + // Move __csan_detach and __csan_task to the Preheader. + moveInstrumentation("__csan_detach", *Header, *Preheader, + Preheader->getTerminator()); + moveInstrumentation("__csan_task", *TaskEntry, *Preheader, + Preheader->getTerminator()); + + // Move __csan_detach_continue and __csan_task_exit on the normal exit path to + // LatchExit. + moveInstrumentation("__csan_detach_continue", *Latch, *LatchExit); + if (TaskExit) + // There's only one block with __csan_task_exit instrumentation to move, so + // move it from that block. + moveInstrumentation("__csan_task_exit", *TaskExit, *LatchExit); + else { + // We need to create PHI nodes for the arguments of a new instrumentation + // call in LatchExit. + + // Scan all predecessors of Latch for __csan_task_exit instrumentation. + DenseMap Instrumentation; + Function *InstrFunc = nullptr; + for (BasicBlock *Pred : predecessors(Latch)) + for (Instruction &I : *Pred) + if (CallBase *CB = dyn_cast(&I)) + if (Function *Called = CB->getCalledFunction()) + if (Called->getName() == "__csan_task_exit") { + Instrumentation.insert(std::make_pair(Pred, CB)); + InstrFunc = Called; + } + + // Return early if we found no instrumentation. + if (!InstrFunc || Instrumentation.empty()) { + LLVM_DEBUG(dbgs() << "No task_exit instrumentation found"); + return; + } + + // Create PHI nodes at the start of Latch for the arguments of the moved + // instrumentation. + SmallVector InstrArgs; + for (BasicBlock *Pred : predecessors(Latch)) { + CallBase *Instr = Instrumentation[Pred]; + if (InstrArgs.empty()) { + // Create PHI nodes at the start of Latch for the instrumentation + // arguments. + IRBuilder<> IRB(&Latch->front()); + for (Value *Arg : Instr->args()) { + PHINode *ArgPHI = + IRB.CreatePHI(Arg->getType(), Instrumentation.size()); + ArgPHI->addIncoming(Arg, Pred); + InstrArgs.push_back(ArgPHI); + } + } else { + // Update the PHI nodes at the start of Latch for the instrumentation. + unsigned ArgIdx = 0; + for (Value *Arg : Instr->args()) { + cast(InstrArgs[ArgIdx])->addIncoming(Arg, Pred); + ++ArgIdx; + } + } + } + + // Insert new instrumentation call at the start of LatchExit. + CallInst::Create(InstrFunc->getFunctionType(), InstrFunc, InstrArgs, "", + &*LatchExit->getFirstInsertionPt()); + + // Remove old instrumentation calls from predecessors + for (BasicBlock *Pred : predecessors(Latch)) + Instrumentation[Pred]->eraseFromParent(); + } +} + +namespace { +static void emitMissedWarning(const Loop *L, const TapirLoopHints &LH, + OptimizationRemarkEmitter *ORE) { + switch (LH.getStrategy()) { + case TapirLoopHints::ST_DAC: + ORE->emit(DiagnosticInfoOptimizationFailure( + DEBUG_TYPE, "FailedRequestedSpawning", + L->getStartLoc(), L->getHeader()) + << "Tapir loop not transformed: " + << "failed to use divide-and-conquer loop spawning." + << " Compile with -Rpass-analysis=" << LS_NAME + << " for more details."); + break; + case TapirLoopHints::ST_SEQ: + ORE->emit(DiagnosticInfoOptimizationFailure( + DEBUG_TYPE, "SpawningDisabled", + L->getStartLoc(), L->getHeader()) + << "Tapir loop not transformed: " + << "loop-spawning transformation disabled"); + break; + case TapirLoopHints::ST_END: + ORE->emit(DiagnosticInfoOptimizationFailure( + DEBUG_TYPE, "FailedRequestedSpawning", + L->getStartLoc(), L->getHeader()) + << "Tapir loop not transformed: " + << "unknown loop-spawning strategy"); + break; + } +} + +/// Process Tapir loops within the given function for loop spawning. +class LoopSpawningImpl { +public: + LoopSpawningImpl(Function &F, DominatorTree &DT, LoopInfo &LI, TaskInfo &TI, + ScalarEvolution &SE, AssumptionCache &AC, + TargetTransformInfo &TTI, TapirTarget *Target, + OptimizationRemarkEmitter &ORE) + : F(F), DT(DT), LI(LI), TI(TI), SE(SE), AC(AC), TTI(TTI), Target(Target), + ORE(ORE) {} + + ~LoopSpawningImpl() { + for (TapirLoopInfo *TL : TapirLoops) + delete TL; + TapirLoops.clear(); + TaskToTapirLoop.clear(); + LoopToTapirLoop.clear(); + } + + bool run(); + + // If loop \p L defines a recorded Tapir loop, returns the Tapir loop info for + // that Tapir loop. Otherwise returns null. + TapirLoopInfo *getTapirLoop(Loop *L) { + if (!LoopToTapirLoop.count(L)) + return nullptr; + return LoopToTapirLoop[L]; + } + + // If task \p T defines a recorded Tapir loop, returns the Tapir loop info for + // that Tapir loop. Otherwise returns null. + TapirLoopInfo *getTapirLoop(Task *T) { + if (!TaskToTapirLoop.count(T)) + return nullptr; + return TaskToTapirLoop[T]; + } + + // Gets the Tapir loop that contains basic block \p B, i.e., the Tapir loop + // for the loop associated with \p B. + TapirLoopInfo *getTapirLoop(const BasicBlock *B) { + return getTapirLoop(LI.getLoopFor(B)); + } + +private: + // Record a Tapir loop defined by loop \p L and task \p T. + TapirLoopInfo *createTapirLoop(Loop *L, Task *T) { + TapirLoops.push_back(new TapirLoopInfo(L, T, ORE)); + TaskToTapirLoop[T] = TapirLoops.back(); + LoopToTapirLoop[L] = TapirLoops.back(); + ++TapirLoopsFound; + return TapirLoops.back(); + } + + // Forget the specified Tapir loop \p TL. + void forgetTapirLoop(TapirLoopInfo *TL) { + TaskToTapirLoop.erase(TL->getTask()); + LoopToTapirLoop.erase(TL->getLoop()); + } + + // If loop \p L is a Tapir loop, return its corresponding task. + Task *getTaskIfTapirLoop(const Loop *L); + + // Get the LoopOutlineProcessor for handling Tapir loop \p TL. + LoopOutlineProcessor *getOutlineProcessor(TapirLoopInfo *TL); + + using LOPMapTy = DenseMap>; + + // For all recorded Tapir loops, determine the function arguments and inputs + // for the outlined helper functions for those loops. + // + // The \p LoopArgs map will store the function arguments for these outlined + // loop helpers. The \p LoopInputs map will store the corresponding arguments + // for calling those outlined helpers from the parent function. The \p + // LoopArgStarts map will store the instruction in the parent where new code + // for computing these outlined-helper-call arguments is first inserted. + void getAllTapirLoopInputs( + DenseMap &LoopInputSets, + DenseMap> &LoopCtlArgs, + DenseMap> &LoopCtlInputs); + + // Associate tasks with Tapir loops that enclose them. + void associateTasksToTapirLoops(); + + // Get the set of basic blocks within the task of Tapir loop \p TL. The \p + // TaskBlocks vector stores all of these basic blocks. The \p ReattachBlocks + // set identifies which blocks are terminated by a reattach instruction that + // terminates the task. The \p DetachedRethrowBlocks set identifies which + // blocks are terminated by detached-rethrow instructions that terminate the + // task. Entry points to shared exception-handling code is stored in the + // \p SharedEHEntries set. + // + // This method relies on being executed on the Tapir loops in a function in + // post order. + void getTapirLoopTaskBlocks( + TapirLoopInfo *TL, std::vector &TaskBlocks, + SmallPtrSetImpl &ReattachBlocks, + SmallPtrSetImpl &DetachedRethrowBlocks, + SmallPtrSetImpl &SharedEHEntries, + SmallPtrSetImpl &UnreachableExits); + + // Outline Tapir loop \p TL into a helper function. The \p Args set specified + // the arguments to that helper function. The map \p VMap will store the + // mapping of values in the original function to values in the outlined + // helper. + Function *createHelperForTapirLoop(TapirLoopInfo *TL, ValueSet &Args, + unsigned IVArgIndex, + unsigned LimitArgIndex, Module *DestM, + ValueToValueMapTy &VMap, + ValueToValueMapTy &InputMap); + + // Outline all recorded Tapir loops in the function. + TaskOutlineMapTy outlineAllTapirLoops(); + +private: + Function &F; + + DominatorTree &DT; + LoopInfo &LI; + TaskInfo &TI; + ScalarEvolution &SE; + AssumptionCache &AC; + TargetTransformInfo &TTI; + TapirTarget *Target; + OptimizationRemarkEmitter &ORE; + + std::vector TapirLoops; + DenseMap TaskToTapirLoop; + DenseMap LoopToTapirLoop; + LOPMapTy OutlineProcessors; +}; +} // end anonymous namespace + +// Set up a basic unwind for a detached task: +// +// callunwind: +// lpad = landingpad +// catch null +// invoke detached_rethrow(lpad), label unreachable, label detach_unwind +static BasicBlock *createTaskUnwind(Function *F, BasicBlock *UnwindDest, + Value *SyncRegion, const Twine &Name = "") { + Module *M = F->getParent(); + LLVMContext &Ctx = M->getContext(); + BasicBlock *CallUnwind = BasicBlock::Create(Ctx, Name, F); + + // Create the landing bad. + IRBuilder<> Builder(CallUnwind); + LandingPadInst *LPad = Builder.CreateLandingPad( + UnwindDest->getLandingPadInst()->getType(), 0); + LPad->setCleanup(true); + // Create the normal return for the detached rethrow. + BasicBlock *DRUnreachable = BasicBlock::Create( + Ctx, CallUnwind->getName()+".unreachable", F); + // Invoke the detached rethrow. + Builder.CreateInvoke( + Intrinsic::getDeclaration(M, Intrinsic::detached_rethrow, + { LPad->getType() }), + DRUnreachable, UnwindDest, { SyncRegion, LPad }); + + // Terminate the normal return of the detached rethrow with unreachable. + Builder.SetInsertPoint(DRUnreachable); + Builder.CreateUnreachable(); + + return CallUnwind; +} + +/// Implement the parallel loop control for a given outlined Tapir loop to +/// process loop iterations in a parallel recursive divide-and-conquer fashion. +void DACSpawning::implementDACIterSpawnOnHelper( + TapirLoopInfo &TL, TaskOutlineInfo &Out, ValueToValueMapTy &VMap) { + NamedRegionTimer NRT("implementDACIterSpawnOnHelper", + "Implement D&C spawning of loop iterations", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Task *T = TL.getTask(); + Loop *L = TL.getLoop(); + + DebugLoc TLDebugLoc = cast(VMap[T->getDetach()])->getDebugLoc(); + Value *SyncRegion = cast(VMap[T->getDetach()->getSyncRegion()]); + Function *Helper = Out.Outline; + BasicBlock *Preheader = cast(VMap[L->getLoopPreheader()]); + + PHINode *PrimaryIV = cast(VMap[TL.getPrimaryInduction().first]); + + // Remove the norecurse attribute from Helper. + if (Helper->doesNotRecurse()) + Helper->removeFnAttr(Attribute::NoRecurse); + + // Convert the cloned loop into the strip-mined loop body. + assert(Preheader->getParent() == Helper && + "Preheader does not belong to helper function."); + assert(PrimaryIV->getParent()->getParent() == Helper && + "PrimaryIV does not belong to header"); + + // Get end and grainsize arguments + Argument *End, *Grainsize; + { + auto OutlineArgsIter = Helper->arg_begin(); + if (Helper->hasParamAttribute(0, Attribute::StructRet)) + ++OutlineArgsIter; + // End argument is second LC input. + End = &*++OutlineArgsIter; + // Grainsize argument is third LC input. + Grainsize = &*++OutlineArgsIter; + } + + BasicBlock *DACHead = Preheader; + if (&(Helper->getEntryBlock()) == Preheader) { + // Split the entry block. We'll want to create a backedge into + // the split block later. + DACHead = SplitBlock(Preheader, &Preheader->front()); + + // Move any syncregion_start's in DACHead into Preheader. + BasicBlock::iterator InsertPoint = Preheader->begin(); + for (BasicBlock::iterator I = DACHead->begin(), E = DACHead->end(); + I != E;) { + IntrinsicInst *II = dyn_cast(I++); + if (!II) + continue; + if (Intrinsic::syncregion_start != II->getIntrinsicID()) + continue; + + while (isa(I) && + Intrinsic::syncregion_start == + cast(I)->getIntrinsicID()) + ++I; + + Preheader->splice(InsertPoint, &*DACHead, II->getIterator(), I); + } + + if (!Preheader->getTerminator()->getDebugLoc()) + Preheader->getTerminator()->setDebugLoc( + DACHead->getTerminator()->getDebugLoc()); + } + + Value *PrimaryIVInput = PrimaryIV->getIncomingValueForBlock(DACHead); + Value *PrimaryIVInc = PrimaryIV->getIncomingValueForBlock( + cast(VMap[L->getLoopLatch()])); + + // At this point, DACHead is the preheader to the loop and is guaranteed to + // not be the function entry: + // + // DACHead: ; preds = %entry + // br label Header + // + // From this block, we first create the skeleton of the parallel D&C loop + // control: + // + // DACHead: + // PrimaryIVStart = phi ??? + // IterCount = sub End, PrimaryIVStart + // IterCountCmp = icmp ugt IterCount, Grainsize + // br i1 IterCountCmp, label RecurHead, label Header + // + // RecurHead: + // br label RecurDet + // + // RecurDet: + // br label RecurCont + // + // RecurCont: + // br label DACHead + BasicBlock *RecurHead, *RecurDet, *RecurCont; + Value *IterCount; + PHINode *PrimaryIVStart; + Value *Start; + { + Instruction *PreheaderOrigFront = &(DACHead->front()); + IRBuilder<> Builder(PreheaderOrigFront); + if (!Builder.getCurrentDebugLocation()) + Builder.SetCurrentDebugLocation( + Preheader->getTerminator()->getDebugLoc()); + // Create branch based on grainsize. + PrimaryIVStart = Builder.CreatePHI(PrimaryIV->getType(), 2, + PrimaryIV->getName()+".dac"); + PrimaryIVStart->setDebugLoc(PrimaryIV->getDebugLoc()); + PrimaryIVInput->replaceAllUsesWith(PrimaryIVStart); + Start = PrimaryIVStart; + // Extend or truncate start, if necessary. + if (PrimaryIVStart->getType() != End->getType()) + Start = Builder.CreateZExtOrTrunc(PrimaryIVStart, End->getType()); + IterCount = Builder.CreateSub(End, Start, "itercount"); + Value *IterCountCmp = Builder.CreateICmpUGT(IterCount, Grainsize); + Instruction *RecurTerm = + SplitBlockAndInsertIfThen(IterCountCmp, PreheaderOrigFront, + /*Unreachable=*/false, + /*BranchWeights=*/nullptr); + RecurHead = RecurTerm->getParent(); + // Create RecurHead, RecurDet, and RecurCont, with appropriate branches. + RecurDet = SplitBlock(RecurHead, RecurHead->getTerminator()); + RecurCont = SplitBlock(RecurDet, RecurDet->getTerminator()); + RecurCont->getTerminator()->replaceUsesOfWith(RecurTerm->getSuccessor(0), + DACHead); + } + + // Compute the mid iteration in RecurHead: + // + // RecurHead: + // %halfcount = lshr IterCount, 1 + // MidIter = add PrimaryIVStart, %halfcount + // br label RecurDet + Instruction *MidIter; + { + IRBuilder<> Builder(&(RecurHead->front())); + Value *HalfCount = Builder.CreateLShr(IterCount, 1, "halfcount"); + MidIter = cast(Builder.CreateAdd(Start, HalfCount, "miditer")); + // Copy flags from the increment operation on the primary IV. + MidIter->copyIRFlags(PrimaryIVInc); + } + + // Create a recursive call in RecurDet. If the call cannot throw, then + // RecurDet becomes: + // + // RecurDet: + // call Helper(..., PrimaryIVStart, MidIter, ...) + // br label RecurCont + // + // Otherwise an a new unwind destination, CallUnwind, is created or the + // invoke, and RecurDet becomes: + // + // RecurDet: + // invoke Helper(..., PrimaryIVStart, MidIter, ...) + // to label CallDest unwind label CallUnwind + // + // CallDest: + // br label RecurCont + BasicBlock *RecurCallDest = RecurDet; + BasicBlock *UnwindDest = nullptr; + if (TL.getUnwindDest()) + UnwindDest = cast(VMap[TL.getUnwindDest()]); + { + // Create input array for recursive call. + IRBuilder<> Builder(&(RecurDet->front())); + SmallVector RecurCallInputs; + for (Value &V : Helper->args()) { + // Only the inputs for the start and end iterations need special care. + // All other inputs should match the arguments of Helper. + if (&V == PrimaryIVInput) + RecurCallInputs.push_back(PrimaryIVStart); + else if (&V == End) + RecurCallInputs.push_back(MidIter); + else + RecurCallInputs.push_back(&V); + } + + if (!UnwindDest) { + // Common case. Insert a call to the outline immediately before the detach. + CallInst *RecurCall; + // Create call instruction. + RecurCall = Builder.CreateCall(Helper, RecurCallInputs); + // Use a fast calling convention for the outline. + RecurCall->setCallingConv(Helper->getCallingConv()); + RecurCall->setDebugLoc(TLDebugLoc); + if (Helper->doesNotThrow()) + RecurCall->setDoesNotThrow(); + } else { + InvokeInst *RecurCall; + BasicBlock *CallDest = SplitBlock(RecurDet, RecurDet->getTerminator()); + BasicBlock *CallUnwind = + createTaskUnwind(Helper, UnwindDest, SyncRegion, + RecurDet->getName()+".unwind"); + RecurCall = InvokeInst::Create(Helper, CallDest, CallUnwind, + RecurCallInputs); + // Use a fast calling convention for the outline. + RecurCall->setCallingConv(Helper->getCallingConv()); + RecurCall->setDebugLoc(TLDebugLoc); + ReplaceInstWithInst(RecurDet->getTerminator(), RecurCall); + RecurCallDest = CallDest; + } + } + + // Set up continuation of detached recursive call to compute the next loop + // iteration to execute. For inclusive ranges, this means adding one to + // MidIter: + // + // RecurCont: + // MidIterPlusOne = add MidIter, 1 + // br label DACHead + Instruction *NextIter = MidIter; + if (TL.isInclusiveRange()) { + IRBuilder<> Builder(&(RecurCont->front())); + NextIter = cast( + Builder.CreateAdd(MidIter, ConstantInt::get(End->getType(), 1), + "miditerplusone")); + // Copy flags from the increment operation on the primary IV. + NextIter->copyIRFlags(PrimaryIVInc); + // Extend or truncate NextIter, if necessary + if (PrimaryIVStart->getType() != NextIter->getType()) + NextIter = cast( + Builder.CreateZExtOrTrunc(NextIter, PrimaryIVStart->getType())); + } else if (PrimaryIVStart->getType() != NextIter->getType()) { + IRBuilder<> Builder(&(RecurCont->front())); + NextIter = cast( + Builder.CreateZExtOrTrunc(NextIter, PrimaryIVStart->getType())); + } + + // Finish the phi node in DACHead. + // + // DACHead: + // PrimaryIVStart = phi [ PrimaryIVInput, %entry ], [ NextIter, RecurCont ] + // ... + PrimaryIVStart->addIncoming(PrimaryIVInput, Preheader); + PrimaryIVStart->addIncoming(NextIter, RecurCont); + + // Make the recursive DAC call parallel. + // + // RecurHead: + // detach within SyncRegion, label RecurDet, label RecurCont + // (unwind label DetachUnwind) + // + // RecurDet: + // call Helper(...) + // reattach label RecurCont + // + // or + // + // RecurDet: + // invoke Helper(...) to CallDest unwind UnwindDest + // + // CallDest: + // reattach label RecurCont + { + IRBuilder<> Builder(RecurHead->getTerminator()); + // Create the detach. + DetachInst *NewDI; + if (!UnwindDest) + NewDI = Builder.CreateDetach(RecurDet, RecurCont, SyncRegion); + else + NewDI = Builder.CreateDetach(RecurDet, RecurCont, UnwindDest, + SyncRegion); + NewDI->setDebugLoc(TLDebugLoc); + RecurHead->getTerminator()->eraseFromParent(); + + // Create the reattach. + Builder.SetInsertPoint(RecurCallDest->getTerminator()); + ReattachInst *RI = Builder.CreateReattach(RecurCont, SyncRegion); + RI->setDebugLoc(TLDebugLoc); + RecurCallDest->getTerminator()->eraseFromParent(); + } +} + +/// Examine a given loop to determine if its a Tapir loop that can and should be +/// processed. Returns the Task that encodes the loop body if so, or nullptr if +/// not. +Task *LoopSpawningImpl::getTaskIfTapirLoop(const Loop *L) { + NamedRegionTimer NRT("getTaskIfTapirLoop", + "Check if loop is a Tapir loop to process", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + LLVM_DEBUG(dbgs() << "Analyzing for spawning: " << *L); + + TapirLoopHints Hints(L); + + // Loop must have a preheader. LoopSimplify should guarantee that the loop + // preheader is not terminated by a sync. + const BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "Loop lacks a preheader.\n"); + if (hintsDemandOutlining(Hints)) { + ORE.emit(TapirLoopInfo::createMissedAnalysis(LS_NAME, "NoPreheader", L) + << "loop lacks a preheader"); + emitMissedWarning(L, Hints, &ORE); + } + return nullptr; + } else if (!isa(Preheader->getTerminator())) { + LLVM_DEBUG(dbgs() << "Loop preheader is not terminated by a branch.\n"); + if (hintsDemandOutlining(Hints)) { + ORE.emit(TapirLoopInfo::createMissedAnalysis(LS_NAME, "ComplexPreheader", + L) + << "loop preheader not terminated by a branch"); + emitMissedWarning(L, Hints, &ORE); + } + return nullptr; + } + + // Get the task for this loop if it is a Tapir loop. + Task *T = llvm::getTaskIfTapirLoop(L, &TI); + if (!T) { + LLVM_DEBUG(dbgs() << "Loop does not match structure of Tapir loop.\n"); + if (hintsDemandOutlining(Hints)) { + ORE.emit(TapirLoopInfo::createMissedAnalysis(LS_NAME, "NonCanonicalLoop", + L) + << "loop does not have the canonical structure of a Tapir loop"); + emitMissedWarning(L, Hints, &ORE); + } + return nullptr; + } + + return T; +} + +/// Get the LoopOutlineProcessor for handling Tapir loop \p TL. +LoopOutlineProcessor *LoopSpawningImpl::getOutlineProcessor(TapirLoopInfo *TL) { + NamedRegionTimer NRT("getOutlineProcessor", + "Get a loop-outline processor for a Tapir loop", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + // Allow the Tapir target to define a custom loop-outline processor. + if (LoopOutlineProcessor *TargetLOP = Target->getLoopOutlineProcessor(TL)) + return TargetLOP; + + Module &M = *F.getParent(); + Loop *L = TL->getLoop(); + TapirLoopHints Hints(L); + + switch (Hints.getStrategy()) { + case TapirLoopHints::ST_DAC: return new DACSpawning(M); + default: return new DefaultLoopOutlineProcessor(M); + } +} + +/// Associate tasks with Tapir loops that enclose them. +void LoopSpawningImpl::associateTasksToTapirLoops() { + NamedRegionTimer NRT("associateTasksToTapirLoops", + "Associate tasks to Tapir loops", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + SmallVector UnassocTasks; + // Traverse the tasks in post order, queueing up tasks that are not roots of + // Tapir loops. + for (Task *T : post_order(TI.getRootTask())) { + TapirLoopInfo *TL = getTapirLoop(T); + if (!TL) { + UnassocTasks.push_back(T); + continue; + } + + // When we find a Task T at the root of a Tapir loop TL, associate + // previously traversed tasks that are enclosed in T with TL. + while (!UnassocTasks.empty()) { + Task *UT = UnassocTasks.back(); + if (!TI.encloses(T, UT)) + break; + TL->addDescendantTask(UT); + UnassocTasks.pop_back(); + } + } +} + +// Helper test to see if the given basic block is the placeholder normal +// destination of a detached.rethrow or taskframe.resume intrinsic. +static bool isUnreachablePlaceholder(const BasicBlock *B) { + for (const BasicBlock *Pred : predecessors(B)) { + if (!isDetachedRethrow(Pred->getTerminator()) && + !isTaskFrameResume(Pred->getTerminator())) + return false; + if (B != cast(Pred->getTerminator())->getNormalDest()) + return false; + } + return true; +} + +/// Get the set of basic blocks within the task of Tapir loop \p TL. The \p +/// TaskBlocks vector stores all of these basic blocks. The \p ReattachBlocks +/// set identifies which blocks are terminated by a reattach instruction that +/// terminates the task. The \p DetachedRethrowBlocks set identifies which +/// blocks are terminated by detached-rethrow instructions that terminate the +/// task. Entry points to shared exception-handling code is stored in the +/// \p SharedEHEntries set. +/// +/// This method relies on being executed on the Tapir loops in a function in +/// post order. +void LoopSpawningImpl::getTapirLoopTaskBlocks( + TapirLoopInfo *TL, std::vector &TaskBlocks, + SmallPtrSetImpl &ReattachBlocks, + SmallPtrSetImpl &DetachedRethrowBlocks, + SmallPtrSetImpl &SharedEHEntries, + SmallPtrSetImpl &UnreachableExits) { + NamedRegionTimer NRT("getTapirLoopTaskBlocks", + "Get basic blocks for Tapir loop", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + Task *T = TL->getTask(); + SmallVector EnclosedTasks; + TL->getEnclosedTasks(EnclosedTasks); + SmallPtrSet VisitedSharedEH; + + // Get the header and loop-latch blocks of all Tapir subloops. + SmallPtrSet SubloopControlToExclude; + for (Task *EncT : EnclosedTasks) { + for (Task *SubT : EncT->subtasks()) { + if (TapirLoopInfo *SubTL = getTapirLoop(SubT)) { + SubloopControlToExclude.insert(SubTL->getLoop()->getHeader()); + SubloopControlToExclude.insert(SubTL->getLoop()->getLoopLatch()); + // Mark the unwind destination of this subloop's detach as a + // "SharedEHEntry," meaning it needs its Phi nodes updated after + // cloning. + DetachInst *SubDI = + cast(SubTL->getLoop()->getHeader()->getTerminator()); + if (SubDI->hasUnwindDest()) + SharedEHEntries.insert(SubDI->getUnwindDest()); + } + } + } + + for (Task *EncT : EnclosedTasks) { + for (Spindle *S : depth_first>(EncT->getEntrySpindle())) { + // Record the entry blocks of any shared-EH spindles. + if (S->isSharedEH()) { + SharedEHEntries.insert(S->getEntry()); + if (!VisitedSharedEH.insert(S).second) + continue; + } + + bool TopLevelTaskSpindle = T->contains(S) || T->isSharedEHExit(S); + for (BasicBlock *B : S->blocks()) { + // Don't clone header and loop-latch blocks for Tapir subloops. + if (SubloopControlToExclude.count(B)) + continue; + + // Skip basic blocks that are successors of detached rethrows in T. + // They're dead anyway. + if (TopLevelTaskSpindle && isSuccessorOfDetachedRethrow(B)) + continue; + + // Skip unreachable placeholder blocks, namely, the normal destinations + // of detached.rethrow and taskframe.resume instructions. + if (isUnreachablePlaceholder(B)) + continue; + + LLVM_DEBUG(dbgs() << "Adding block " << B->getName() << "\n"); + TaskBlocks.push_back(B); + + if (TopLevelTaskSpindle) { + // Record the blocks terminated by reattaches and detached rethrows. + if (isa(B->getTerminator())) + ReattachBlocks.insert(B); + if (isDetachedRethrow(B->getTerminator())) + DetachedRethrowBlocks.insert(B); + if (isTaskFrameResume(B->getTerminator())) + UnreachableExits.insert( + cast(B->getTerminator())->getNormalDest()); + } else if (isDetachedRethrow(B->getTerminator()) || + isTaskFrameResume(B->getTerminator())) { + UnreachableExits.insert( + cast(B->getTerminator())->getNormalDest()); + } + } + } + } +} + +/// Compute the grainsize of the loop, based on the limit. Currently this +/// routine injects a call to the tapir_loop_grainsize intrinsic, which is +/// handled in a target-specific way by subsequent lowering passes. +static Value *computeGrainsize(TapirLoopInfo *TL) { + Value *TripCount = TL->getTripCount(); + assert(TripCount && + "No trip count found for computing grainsize of Tapir loop."); + Type *IdxTy = TripCount->getType(); + BasicBlock *Preheader = TL->getLoop()->getLoopPreheader(); + Module *M = Preheader->getModule(); + IRBuilder<> B(Preheader->getTerminator()); + B.SetCurrentDebugLocation(TL->getDebugLoc()); + return B.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::tapir_loop_grainsize, + { IdxTy }), { TripCount }); +} + +/// Get the grainsize of this loop either from metadata or by computing the +/// grainsize. +static Value *getGrainsizeVal(TapirLoopInfo *TL) { + Value *GrainVal; + if (unsigned Grainsize = TL->getGrainsize()) + GrainVal = ConstantInt::get(TL->getTripCount()->getType(), Grainsize); + else + GrainVal = computeGrainsize(TL); + + LLVM_DEBUG(dbgs() << "Grainsize value: " << *GrainVal << "\n"); + return GrainVal; +} + +/// Determine the inputs to Tapir loop \p TL for the loop control. +static void getLoopControlInputs(TapirLoopInfo *TL, + SmallVectorImpl &LCArgs, + SmallVectorImpl &LCInputs) { + // Add an argument for the primary induction variable. + auto &PrimaryInduction = TL->getPrimaryInduction(); + PHINode *PrimaryPhi = PrimaryInduction.first; + TL->StartIterArg = new Argument(PrimaryPhi->getType(), + PrimaryPhi->getName() + ".start"); + LCArgs.push_back(TL->StartIterArg); + LCInputs.push_back(PrimaryInduction.second.getStartValue()); + + // Add an argument for the trip count. + Value *TripCount = TL->getTripCount(); + assert(TripCount && "No trip count found for Tapir loop end argument."); + TL->EndIterArg = new Argument(TripCount->getType(), "end"); + LCArgs.push_back(TL->EndIterArg); + LCInputs.push_back(TripCount); + + // Add an argument for the grainsize. + Value *GrainsizeVal = getGrainsizeVal(TL); + TL->GrainsizeArg = new Argument(GrainsizeVal->getType(), "grainsize"); + LCArgs.push_back(TL->GrainsizeArg); + LCInputs.push_back(GrainsizeVal); + + assert(TL->getInductionVars()->size() == 1 && + "Induction vars to process for arguments."); + // // Add arguments for the other IV's. + // for (auto &InductionEntry : *TL->getInductionVars()) { + // PHINode *Phi = InductionEntry.first; + // InductionDescriptor II = InductionEntry.second; + // if (Phi == PrimaryInduction.first) continue; + // LCArgs.push_back(new Argument(Phi->getType(), + // Phi->getName() + ".start")); + // LCInputs.push_back(II.getStartValue()); + // } +} + +/// For all recorded Tapir loops, determine the function arguments and inputs +/// for the outlined helper functions for those loops. +/// +/// The \p LoopArgs map will store the function arguments for these outlined +/// loop helpers. The \p LoopInputs map will store the corresponding arguments +/// for calling those outlined helpers from the parent function. The \p +/// LoopArgStarts map will store the instruction in the parent where new code +/// for computing these outlined-helper-call arguments is first inserted. +void LoopSpawningImpl::getAllTapirLoopInputs( + DenseMap &LoopInputSets, + DenseMap> &LoopCtlArgs, + DenseMap> &LoopCtlInputs) { + NamedRegionTimer NRT("getAllTapirLoopInputs", + "Determine inputs for all Tapir loops", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + // Determine the inputs for all tasks. + TaskValueSetMap TaskInputs = findAllTaskInputs(F, DT, TI); + + // Combine these sets of inputs to determine inputs for each Tapir loop. + DenseMap TapirLoopInputs; + for (Task *T : post_order(TI.getRootTask())) { + if (TapirLoopInfo *TL = getTapirLoop(T)) { + Loop *L = TL->getLoop(); + + // Convert inputs for task T to Tapir-loop inputs. + ValueSet TLInputs = getTapirLoopInputs(TL, TaskInputs[T]); + LoopInputSets[L] = TLInputs; + LLVM_DEBUG({ + dbgs() << "TLInputs\n"; + for (Value *V : TLInputs) + dbgs() << "\t" << *V << "\n"; + }); + + // Determine loop-control inputs. + getLoopControlInputs(TL, LoopCtlArgs[L], LoopCtlInputs[L]); + + LLVM_DEBUG({ + dbgs() << "LoopCtlArgs:\n"; + for (Value *V : LoopCtlArgs[L]) + dbgs() << "\t" << *V << "\n"; + dbgs() << "LoopCtlInputs:\n"; + for (Value *V : LoopCtlInputs[L]) + dbgs() << "\t" << *V << "\n"; + }); + } + } +} + +static void updateClonedIVs( + TapirLoopInfo *TL, BasicBlock *OrigPreheader, + ValueSet &Args, ValueToValueMapTy &VMap, unsigned IVArgIndex, + unsigned NextIVArgOffset = 3) { + NamedRegionTimer NRT("updateClonedIVs", "Updated IVs in Tapir-loop helper", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + auto &PrimaryInduction = TL->getPrimaryInduction(); + PHINode *PrimaryPhi = PrimaryInduction.first; + + Value *PrimaryArg = Args[IVArgIndex]; + + // TODO: This assertion implies that the following loop should only run once, + // for the primary induction variable. However, the loop is provided in case + // we decide to handle more complicated sets of induction variables in the + // future. + assert(TL->getInductionVars()->size() == 1 && + "updateClonedIVs to process multiple inductions."); + + // Get the next argument that provides an input to an IV, which is typically 3 + // after the input for the primary induction variable, after the end-teration + // and grainsize arguments. + unsigned ArgIdx = IVArgIndex + NextIVArgOffset; + for (auto &InductionEntry : *TL->getInductionVars()) { + PHINode *OrigPhi = InductionEntry.first; + InductionDescriptor II = InductionEntry.second; + assert(II.getKind() == InductionDescriptor::IK_IntInduction && + "Non-integer induction found."); + assert((II.getConstIntStepValue()->isOne() || + II.getConstIntStepValue()->isMinusOne()) && + "Non-canonical induction found: non-unit step."); + assert(isa(II.getStartValue()) && + "Non-canonical induction found: non-constant start."); + assert(cast(II.getStartValue())->isNullValue() && + "Non-canonical induction found: non-zero start."); + + // Get the remapped PHI node and preheader + PHINode *NewPhi = cast(VMap[OrigPhi]); + BasicBlock *NewPreheader = cast(VMap[OrigPreheader]); + + // Replace the input for the remapped PHI node from the preheader with the + // input argument. + unsigned BBIdx = NewPhi->getBasicBlockIndex(NewPreheader); + if (OrigPhi == PrimaryPhi) + NewPhi->setIncomingValue(BBIdx, VMap[PrimaryArg]); + else + // TODO: Because of the assertion above, this line should never run. + NewPhi->setIncomingValue(BBIdx, VMap[Args[ArgIdx++]]); + } +} + +namespace { +// ValueMaterializer to manage remapping uses of the tripcount in the helper +// function for the loop, when the only uses of tripcount occur in the condition +// for the loop backedge and, possibly, in metadata. +class ArgEndMaterializer final : public OutlineMaterializer { +private: + Value *TripCount; + Value *ArgEnd; +public: + ArgEndMaterializer(const Instruction *SrcSyncRegion, Value *TripCount, + Value *ArgEnd) + : OutlineMaterializer(SrcSyncRegion), TripCount(TripCount), + ArgEnd(ArgEnd) {} + + Value *materialize(Value *V) final { + // If we're materializing metadata for TripCount, materialize empty metadata + // instead. + if (auto *MDV = dyn_cast(V)) { + Metadata *MD = MDV->getMetadata(); + if (auto *LAM = dyn_cast(MD)) + if (LAM->getValue() == TripCount) + return MetadataAsValue::get( + V->getContext(), MDTuple::get(V->getContext(), std::nullopt)); + } + + // Materialize TripCount with ArgEnd. This should only occur in the loop + // latch, and we'll overwrite the use of ArgEnd later. + if (V == TripCount) + return ArgEnd; + + // Otherwise go with the default behavior. + return OutlineMaterializer::materialize(V); + } +}; +} + +/// Outline Tapir loop \p TL into a helper function. The \p Args set specified +/// the arguments to that helper function. The map \p VMap will store the +/// mapping of values in the original function to values in the outlined helper. +Function *LoopSpawningImpl::createHelperForTapirLoop( + TapirLoopInfo *TL, ValueSet &Args, unsigned IVArgIndex, + unsigned LimitArgIndex, Module *DestM, ValueToValueMapTy &VMap, + ValueToValueMapTy &InputMap) { + Task *T = TL->getTask(); + Loop *L = TL->getLoop(); + BasicBlock *Header = L->getHeader(); + BasicBlock *Preheader = L->getLoopPreheader(); + + // Collect all basic blocks in the Tapir loop. + std::vector TLBlocks; + TLBlocks.push_back(L->getHeader()); + // Entry blocks of shared-EH spindles may contain PHI nodes that need to be + // rewritten in the cloned helper. + SmallPtrSet SharedEHEntries; + SmallPtrSet DetachedRethrowBlocks; + SmallPtrSet UnreachableExits; + // Reattach instructions and detached rethrows in this task might need special + // handling. + SmallPtrSet ReattachBlocks; + getTapirLoopTaskBlocks(TL, TLBlocks, ReattachBlocks, DetachedRethrowBlocks, + SharedEHEntries, UnreachableExits); + TLBlocks.push_back(L->getLoopLatch()); + + DetachInst *DI = T->getDetach(); + const Instruction *InputSyncRegion = + dyn_cast(DI->getSyncRegion()); + + OutlineMaterializer *Mat = nullptr; + if (!isa(TL->getTripCount()) && !Args.count(TL->getTripCount())) + // Create an ArgEndMaterializer to handle uses of TL->getTripCount(). + Mat = new ArgEndMaterializer(InputSyncRegion, TL->getTripCount(), + Args[LimitArgIndex]); + else + Mat = new OutlineMaterializer(InputSyncRegion); + + Twine NameSuffix = ".ls" + Twine(TL->getLoop()->getLoopDepth()); + SmallVector Returns; // Ignore returns cloned. + ValueSet Outputs; // Outputs must be empty. + Function *Helper; + { + NamedRegionTimer NRT("CreateHelper", "Create helper for Tapir loop", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Helper = CreateHelper( + Args, Outputs, TLBlocks, Header, Preheader, TL->getExitBlock(), VMap, + DestM, F.getSubprogram() != nullptr, Returns, NameSuffix.str(), nullptr, + &DetachedRethrowBlocks, &SharedEHEntries, TL->getUnwindDest(), + &UnreachableExits, nullptr, nullptr, nullptr, Mat); + } // end timed region + + assert(Returns.empty() && "Returns cloned when cloning detached CFG."); + // If the Tapir loop has no unwind destination, then the outlined function + // cannot throw. + if (F.doesNotThrow() && !TL->getUnwindDest()) + Helper->setDoesNotThrow(); + // Don't inherit the noreturn attribute from the caller. + if (F.doesNotReturn()) + Helper->removeFnAttr(Attribute::NoReturn); + + // Update cloned loop condition to use the end-iteration argument. + unsigned TripCountIdx = 0; + Value *TripCount = TL->getTripCount(); + if (InputMap[TripCount]) + TripCount = InputMap[TripCount]; + if (TL->getCondition()->getOperand(0) != TripCount) + ++TripCountIdx; + assert(TL->getCondition()->getOperand(TripCountIdx) == TripCount && + "Trip count not used in condition"); + ICmpInst *ClonedCond = cast(VMap[TL->getCondition()]); + ClonedCond->setOperand(TripCountIdx, VMap[Args[LimitArgIndex]]); + + // If the trip count is variable and we're not passing the trip count as an + // argument, undo the eariler temporarily mapping. + if (!isa(TL->getTripCount()) && !Args.count(TL->getTripCount())) { + VMap.erase(TL->getTripCount()); + } + + // Delete the ArgEndMaterializer or OutlineMaterializer. + if (Mat) + delete Mat; + + // Rewrite cloned IV's to start at their start-iteration arguments. + updateClonedIVs(TL, Preheader, Args, VMap, IVArgIndex); + + // Add alignment assumptions to arguments of helper, based on alignment of + // values in old function. + { + NamedRegionTimer NRT("AddAlignmentAssumptions", + "Add alignment assumptions to Tapir-loop helper", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + AddAlignmentAssumptions(&F, Args, VMap, Preheader->getTerminator(), &AC, &DT); + } // end timed region + + // CreateHelper partially serializes the cloned copy of the loop by converting + // detached-rethrows into resumes. We now finish the job of serializing the + // cloned Tapir loop. + + // Move allocas in the newly cloned detached CFG to the entry block of the + // helper. + { + NamedRegionTimer NRT("updateAllocas", "Update allocas in Tapir-loop helper", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Collect the end instructions of the task. + SmallVector TaskEnds; + for (BasicBlock *EndBlock : ReattachBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + for (BasicBlock *EndBlock : DetachedRethrowBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + + // Move allocas in cloned detached block to entry of helper function. + BasicBlock *ClonedTaskEntry = cast(VMap[T->getEntry()]); + bool ContainsDynamicAllocas = MoveStaticAllocasInBlock( + &Helper->getEntryBlock(), ClonedTaskEntry, TaskEnds); + + // If this task uses a taskframe, move allocas in cloned taskframe entry to + // entry of helper function. + if (Spindle *TFCreate = T->getTaskFrameCreateSpindle()) { + BasicBlock *ClonedTFEntry = cast(VMap[TFCreate->getEntry()]); + ContainsDynamicAllocas |= MoveStaticAllocasInBlock( + &Helper->getEntryBlock(), ClonedTFEntry, TaskEnds); + } + // If the cloned loop contained dynamic alloca instructions, wrap the cloned + // loop with llvm.stacksave/llvm.stackrestore intrinsics. + if (ContainsDynamicAllocas) { + Module *M = Helper->getParent(); + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore = + Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = + IRBuilder<>(&*ClonedTaskEntry, ClonedTaskEntry->begin()) + .CreateCall(StackSave, {}, "savedstack"); + + // Insert a call to llvm.stackrestore before the reattaches in the + // original Tapir loop. + for (Instruction *ExitPoint : TaskEnds) + IRBuilder<>(ExitPoint).CreateCall(StackRestore, SavedPtr); + } + } + + // Convert the cloned detach and reattaches into unconditional branches. + { + NamedRegionTimer NRT("serializeClonedLoop", "Serialize cloned Tapir loop", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + DetachInst *ClonedDI = cast(VMap[DI]); + BasicBlock *ClonedDetacher = ClonedDI->getParent(); + BasicBlock *ClonedContinue = ClonedDI->getContinue(); + for (BasicBlock *RB : ReattachBlocks) { + ReattachInst *ClonedRI = cast(VMap[RB->getTerminator()]); + ReplaceInstWithInst(ClonedRI, BranchInst::Create(ClonedContinue)); + } + ClonedContinue->removePredecessor(ClonedDetacher); + BranchInst *DetachRepl = BranchInst::Create(ClonedDI->getDetached()); + ReplaceInstWithInst(ClonedDI, DetachRepl); + VMap[DI] = DetachRepl; + } // end timed region + + return Helper; +} + +/// Outline all recorded Tapir loops in the function. +TaskOutlineMapTy LoopSpawningImpl::outlineAllTapirLoops() { + // Prepare Tapir loops for outlining. + for (Task *T : post_order(TI.getRootTask())) { + if (TapirLoopInfo *TL = getTapirLoop(T)) { + PredicatedScalarEvolution PSE(SE, *TL->getLoop()); + bool canOutline = TL->prepareForOutlining(DT, LI, TI, PSE, AC, LS_NAME, + ORE, TTI); + if (!canOutline) { + const Loop *L = TL->getLoop(); + TapirLoopHints Hints(L); + emitMissedWarning(L, Hints, &ORE); + forgetTapirLoop(TL); + continue; + } + + // Get an outline processor for each Tapir loop. + OutlineProcessors[TL] = + std::unique_ptr(getOutlineProcessor(TL)); + } + } + + TaskOutlineMapTy TaskToOutline; + DenseMap LoopInputSets; + DenseMap> LoopCtlArgs; + DenseMap> LoopCtlInputs; + + DenseMap LoopArgs; + DenseMap> LoopInputs; + DenseMap LoopArgStarts; + + getAllTapirLoopInputs(LoopInputSets, LoopCtlArgs, LoopCtlInputs); + + associateTasksToTapirLoops(); + + for (Task *T : post_order(TI.getRootTask())) { + LLVM_DEBUG(dbgs() << "Examining task@" << T->getEntry()->getName() << + " for outlining\n"); + // If any subtasks were outlined as Tapir loops, replace these loops with + // calls to the outlined functions. + { + NamedRegionTimer NRT("replaceSubLoopCalls", + "Update sub-Tapir-loops with calls to helpers", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (Task *SubT : T->subtasks()) { + if (TapirLoopInfo *TL = getTapirLoop(SubT)) { + // emitSCEVChecks(TL->getLoop(), TL->getBypass()); + Loop *L = TL->getLoop(); + TaskToOutline[SubT].replaceReplCall( + replaceLoopWithCallToOutline(TL, TaskToOutline[SubT], LoopInputs[L])); + } + } + } // end timed region + + TapirLoopInfo *TL = getTapirLoop(T); + if (!TL) + continue; + + Loop *L = TL->getLoop(); + LLVM_DEBUG(dbgs() << "Outlining Tapir " << *L << "\n"); + + // Convert the inputs of the Tapir loop to inputs to the helper. + ValueSet TLInputsFixed; + ValueToValueMapTy InputMap; + Instruction *ArgStart; + { + NamedRegionTimer NRT("fixupHelperInputs", + "Fixup inputs to Tapir-loop body", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + ArgStart = + fixupHelperInputs(F, T, LoopInputSets[L], TLInputsFixed, + L->getLoopPreheader()->getTerminator(), + &*L->getHeader()->getFirstInsertionPt(), + OutlineProcessors[TL]->getArgStructMode(), InputMap, + L); + } // end timed region + + ValueSet HelperArgs; + SmallVector HelperInputs; + { + NamedRegionTimer NRT("setupLoopOutlineArgs", + "Setup inputs to Tapir-loop helper function", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + OutlineProcessors[TL]->setupLoopOutlineArgs( + F, HelperArgs, HelperInputs, LoopInputSets[L], LoopCtlArgs[L], + LoopCtlInputs[L], TLInputsFixed); + } // end timed region + + LLVM_DEBUG({ + dbgs() << "HelperArgs:\n"; + for (Value *V : HelperArgs) + dbgs() << "\t" << *V << "\n"; + dbgs() << "HelperInputs:\n"; + for (Value *V : HelperInputs) + dbgs() << "\t" << *V << "\n"; + }); + + LoopArgs[L] = HelperArgs; + for (Value *V : HelperInputs) + LoopInputs[L].push_back(V); + LoopArgStarts[L] = ArgStart; + + ValueToValueMapTy VMap; + // Create the helper function. + Function *Outline = createHelperForTapirLoop( + TL, LoopArgs[L], OutlineProcessors[TL]->getIVArgIndex(F, LoopArgs[L]), + OutlineProcessors[TL]->getLimitArgIndex(F, LoopArgs[L]), + &OutlineProcessors[TL]->getDestinationModule(), VMap, InputMap); + TaskToOutline[T] = TaskOutlineInfo( + Outline, T->getEntry(), cast(VMap[T->getDetach()]), + dyn_cast_or_null(VMap[T->getTaskFrameUsed()]), + LoopInputSets[L], LoopArgStarts[L], + L->getLoopPreheader()->getTerminator(), TL->getExitBlock(), + TL->getUnwindDest()); + + // Do ABI-dependent processing of each outlined Tapir loop. + { + NamedRegionTimer NRT("postProcessOutline", + "Post-process Tapir-loop helper function", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + OutlineProcessors[TL]->postProcessOutline(*TL, TaskToOutline[T], VMap); + } // end timed region + + LLVM_DEBUG({ + dbgs() << "LoopInputs[L]:\n"; + for (Value *V : LoopInputs[L]) + dbgs() << "\t" << *V << "\n"; + }); + + { + NamedRegionTimer NRT("clearMetadata", "Cleanup Tapir-loop metadata", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + TapirLoopHints Hints(L); + Hints.clearClonedLoopMetadata(VMap); + Hints.clearStrategy(); + } + + // Update subtask outline info to reflect the fact that their spawner was + // outlined. + { + NamedRegionTimer NRT("remapData", "Remap Tapir subloop information", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + LLVM_DEBUG(dbgs() << "Remapping subloop outline info.\n"); + for (Loop *SubL : *L) { + if (TapirLoopInfo *SubTL = getTapirLoop(SubL)) { + Task *SubT = SubTL->getTask(); + if (TaskToOutline.count(SubT)) { + TaskToOutline[SubT].remapOutlineInfo(VMap, InputMap); + OutlineProcessors[SubTL]->remapData(VMap); + } + } + } + } + } + + return TaskToOutline; +} + +bool LoopSpawningImpl::run() { + if (TI.isSerial()) + return false; + + // Discover all Tapir loops and record them. + for (Loop *TopLevelLoop : LI) + for (Loop *L : post_order(TopLevelLoop)) + if (Task *T = getTaskIfTapirLoop(L)) + createTapirLoop(L, T); + + if (TapirLoops.empty()) + return false; + + // Perform any Target-dependent preprocessing of F. + Target->preProcessFunction(F, TI, true); + + // Outline all Tapir loops. + TaskOutlineMapTy TapirLoopOutlines = outlineAllTapirLoops(); + + // Perform target-specific processing of the outlined-loop calls. + { + NamedRegionTimer NRT("processOutlinedLoopCall", + "Process calls to outlined loops", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (Task *T : post_order(TI.getRootTask())) + if (TapirLoopInfo *TL = getTapirLoop(T)) + OutlineProcessors[TL]->processOutlinedLoopCall(*TL, TapirLoopOutlines[T], + DT); + } // end timed region + + // Perform any Target-dependent postprocessing of F. + Target->postProcessFunction(F, true); + + LLVM_DEBUG({ + NamedRegionTimer NRT("verify", "Post-loop-spawning verification", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + if (verifyModule(*F.getParent(), &errs())) { + LLVM_DEBUG(dbgs() << "Module after loop spawning:" << *F.getParent()); + llvm_unreachable("Loop spawning produced bad IR!"); + } + }); + + return true; +} + +PreservedAnalyses LoopSpawningPass::run(Module &M, ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + auto GetDT = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); + }; + auto GetTI = [&FAM](Function &F) -> TaskInfo & { + return FAM.getResult(F); + }; + auto GetSE = [&FAM](Function &F) -> ScalarEvolution & { + return FAM.getResult(F); + }; + auto GetAC = [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & { + return FAM.getResult(F); + }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + auto GetORE = [&FAM](Function &F) -> OptimizationRemarkEmitter & { + return FAM.getResult(F); + }; + + SmallVector WorkList; + bool Changed = false; + for (Function &F : M) + if (!F.empty()) + WorkList.push_back(&F); + + // Transform all loops into simplified, LCSSA form before we process them. + for (Function *F : WorkList) { + LoopInfo &LI = GetLI(*F); + DominatorTree &DT = GetDT(*F); + ScalarEvolution &SE = GetSE(*F); + SmallVector LoopWorkList; + for (Loop *L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, &GetAC(*F), nullptr, + /* PreserveLCSSA */ false); + LoopWorkList.push_back(L); + } + for (Loop *L : LoopWorkList) + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + // Now process each loop. + for (Function *F : WorkList) { + TapirTargetID TargetID = GetTLI(*F).getTapirTarget(); + std::unique_ptr Target(getTapirTargetFromID(M, TargetID)); + Changed |= LoopSpawningImpl(*F, GetDT(*F), GetLI(*F), GetTI(*F), GetSE(*F), + GetAC(*F), GetTTI(*F), Target.get(), GetORE(*F)) + .run(); + } + if (Changed) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { +// NB: Technicaly LoopSpawningTI should be a ModulePass, because it changes the +// contents of the module. But because a ModulePass cannot use many function +// analyses -- doing so results in invalid memory accesses -- we have to make +// LoopSpawningTI a FunctionPass. This problem is fixed with the new pass +// manager. +struct LoopSpawningTI : public FunctionPass { + /// Pass identification, replacement for typeid + static char ID; + explicit LoopSpawningTI() : FunctionPass(ID) { + initializeLoopSpawningTIPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + Module &M = *F.getParent(); + + auto &DT = getAnalysis().getDomTree(); + auto &LI = getAnalysis().getLoopInfo(); + auto &TI = getAnalysis().getTaskInfo(); + auto &SE = getAnalysis().getSE(); + auto &AC = getAnalysis().getAssumptionCache(F); + auto &TLI = getAnalysis().getTLI(F); + TapirTargetID TargetID = TLI.getTapirTarget(); + auto &TTI = getAnalysis().getTTI(F); + auto &ORE = getAnalysis().getORE(); + + LLVM_DEBUG(dbgs() << "LoopSpawningTI on function " << F.getName() << "\n"); + TapirTarget *Target = getTapirTargetFromID(M, TargetID); + bool Changed = + LoopSpawningImpl(F, DT, LI, TI, SE, AC, TTI, Target, ORE).run(); + delete Target; + return Changed; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequiredID(LoopSimplifyID); + AU.addRequiredID(LCSSAID); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } +}; +} + +char LoopSpawningTI::ID = 0; +static const char ls_name[] = "Loop Spawning with Task Info"; +INITIALIZE_PASS_BEGIN(LoopSpawningTI, LS_NAME, ls_name, false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopSimplify) +INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(LoopSpawningTI, LS_NAME, ls_name, false, false) + +namespace llvm { +Pass *createLoopSpawningTIPass() { + return new LoopSpawningTI(); +} +} diff --git a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp new file mode 100644 index 00000000000000..02e04f4fefbf0d --- /dev/null +++ b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp @@ -0,0 +1,1559 @@ +//===- LoopStripMine.cpp - Loop strip-mining utilities --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements some loop strip-mining utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/LoopStripMine.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" + +using namespace llvm; + +#define LSM_NAME "loop-stripmine" +#define DEBUG_TYPE LSM_NAME + +static cl::opt StripMineCount( + "stripmine-count", cl::Hidden, + cl::desc("Use this stripmine count for all loops, for testing purposes")); + +static cl::opt StripMineCoarseningFactor( + "stripmine-coarsen-factor", cl::Hidden, + cl::desc("Use this coarsening factor for stripmining")); + +static cl::opt StripMineUnrollRemainder( + "stripmine-unroll-remainder", cl::Hidden, + cl::desc("Allow the loop remainder after stripmining to be unrolled.")); + +/// Constants for stripmining cost analysis. +namespace StripMineConstants { +/// Default coarsening factor for strpimined Tapir loops. +const unsigned DefaultCoarseningFactor = 2048; +} + +/// The function chooses which type of stripmine (epilog or prolog) is more +/// profitabale. +/// Epilog stripmine is more profitable when there is PHI that starts from +/// constant. In this case epilog will leave PHI start from constant, +/// but prolog will convert it to non-constant. +/// +/// loop: +/// PN = PHI [I, Latch], [CI, Preheader] +/// I = foo(PN) +/// ... +/// +/// Epilog stripmine case. +/// loop: +/// PN = PHI [I2, Latch], [CI, Preheader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// Prolog stripmine case. +/// NewPN = PHI [PrologI, Prolog], [CI, Preheader] +/// loop: +/// PN = PHI [I2, Latch], [NewPN, Preheader] +/// I1 = foo(PN) +/// I2 = foo(I1) +/// ... +/// +static bool isEpilogProfitable(const Loop *L) { + const BasicBlock *Preheader = L->getLoopPreheader(); + const BasicBlock *Header = L->getHeader(); + assert(Preheader && Header); + for (const PHINode &PN : Header->phis()) { + if (isa(PN.getIncomingValueForBlock(Preheader))) + return true; + } + return false; +} + +/// Perform some cleanup and simplifications on loops after stripmining. It is +/// useful to simplify the IV's in the new loop, as well as do a quick +/// simplify/dce pass of the instructions. +void llvm::simplifyLoopAfterStripMine(Loop *L, bool SimplifyIVs, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + const TargetTransformInfo &TTI, + AssumptionCache *AC) { + // Simplify any new induction variables in the stripmined loop. + if (SE && SimplifyIVs) { + SmallVector DeadInsts; + simplifyLoopIVs(L, SE, DT, LI, &TTI, DeadInsts); + + // Aggressively clean up dead instructions that simplifyLoopIVs already + // identified. Any remaining should be cleaned up below. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + } + + // At this point, the code is well formed. We now do a quick sweep over the + // inserted code, doing constant propagation and dead code elimination as we + // go. + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + const std::vector &NewLoopBlocks = L->getBlocks(); + for (BasicBlock *BB : NewLoopBlocks) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; + + if (Value *V = simplifyInstruction(Inst, {DL, nullptr, DT, AC})) + if (LI->replacementPreservesLCSSAForm(Inst, V)) + Inst->replaceAllUsesWith(V); + if (isInstructionTriviallyDead(Inst)) + Inst->eraseFromParent(); + } + } + + // TODO: after stripmining, previously loop variant conditions are likely to + // fold to constants, eagerly propagating those here will require fewer + // cleanup passes to be run. Alternatively, a LoopEarlyCSE might be + // appropriate. +} + +/// Gather the various unrolling parameters based on the defaults, compiler +/// flags, TTI overrides and user specified parameters. +TargetTransformInfo::StripMiningPreferences llvm::gatherStripMiningPreferences( + Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, + std::optional UserCount) { + TargetTransformInfo::StripMiningPreferences SMP; + + // Set up the defaults + SMP.Count = 0; + SMP.AllowExpensiveTripCount = false; + SMP.DefaultCoarseningFactor = + (StripMineCoarseningFactor.getNumOccurrences() > 0) ? + StripMineCoarseningFactor : StripMineConstants::DefaultCoarseningFactor; + SMP.UnrollRemainder = false; + + // Override with any target specific settings + TTI.getStripMiningPreferences(L, SE, SMP); + + // Apply any user values specified by cl::opt + if (UserCount) + SMP.Count = *UserCount; + if (StripMineUnrollRemainder.getNumOccurrences() > 0) + SMP.UnrollRemainder = StripMineUnrollRemainder; + + return SMP; +} + +// If loop has an grainsize pragma return the (necessarily positive) value from +// the pragma for stripmining. Otherwise return 0. +static unsigned StripMineCountPragmaValue(const Loop *L) { + TapirLoopHints Hints(L); + return Hints.getGrainsize(); +} + +// Returns true if stripmine count was set explicitly. +// Calculates stripmine count and writes it to SMP.Count. +bool llvm::computeStripMineCount( + Loop *L, const TargetTransformInfo &TTI, InstructionCost LoopCost, + TargetTransformInfo::StripMiningPreferences &SMP) { + // Check for explicit Count. + // 1st priority is stripmine count set by "stripmine-count" option. + bool UserStripMineCount = StripMineCount.getNumOccurrences() > 0; + if (UserStripMineCount) { + SMP.Count = StripMineCount; + SMP.AllowExpensiveTripCount = true; + return true; + } + + // 2nd priority is stripmine count set by pragma. + unsigned PragmaCount = StripMineCountPragmaValue(L); + if (PragmaCount > 0) { + SMP.Count = PragmaCount; + SMP.AllowExpensiveTripCount = true; + return true; + } + + // 3rd priority is computed stripmine count. + // + // We want to coarsen the loop such that the work of detaching a loop + // iteration is tiny compared to the work of the loop body. Specifically, we + // want the total cost of the parallel loop to be at most (1 + \eps) times the + // cost of its serial projection. Let G is the grainsize, n the number of + // loop iterations, d the cost of a detach, and S the work of the loop body. + // Then we want + // + // (n/G)(G*S + d) <= (1 + \eps)(n * S) + // + // Solving for G yeilds G >= d/(\eps * S). Substituting in \eps = 1/C for a + // given coarsening factor C gives the equation below. + Instruction *DetachI = L->getHeader()->getTerminator(); + SMP.Count = *((SMP.DefaultCoarseningFactor * + TTI.getInstructionCost( + DetachI, TargetTransformInfo::TCK_SizeAndLatency) / + LoopCost) + .getValue()); + + return false; +} + +static Task *getTapirLoopForStripMining(const Loop *L, TaskInfo &TI, + OptimizationRemarkEmitter *ORE) { + LLVM_DEBUG(dbgs() << "Analyzing for stripmining: " << *L); + // We only handle Tapir loops. + Task *T = getTaskIfTapirLoopStructure(L, &TI); + if (!T) + return nullptr; + + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() + << " Can't stripmine: loop preheader-insertion failed.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoPreheader", L) + << "loop lacks a preheader"); + return nullptr; + } + assert(isa(Preheader->getTerminator()) && + "Preheader not terminated by a branch"); + + BasicBlock *LatchBlock = L->getLoopLatch(); + if (!LatchBlock) { + LLVM_DEBUG(dbgs() + << " Can't stripmine: loop exit-block-insertion failed.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoLatch", L) + << "loop lacks a latch"); + return nullptr; + } + + // Loops with indirectbr cannot be cloned. + if (!L->isSafeToClone()) { + LLVM_DEBUG(dbgs() << " Can't stripmine: loop body cannot be cloned.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "UnsafeToClone", + L) + << "loop is not safe to clone"); + return nullptr; + } + + // Tapir loops where the loop body does not reattach cannot be stripmined. + if (!llvm::any_of(predecessors(LatchBlock), [](const BasicBlock *B) { + return isa(B->getTerminator()); + })) { + LLVM_DEBUG(dbgs() << " Can't stripmine: loop body does not reattach.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoReattach", L) + << "spawned loop body does not reattach"); + return nullptr; + } + + // The current loop-stripmine pass can only stripmine loops with a single + // latch that's a conditional branch exiting the loop. + // FIXME: The implementation can be extended to work with more complicated + // cases, e.g. loops with multiple latches. + BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); + + if (!BI || BI->isUnconditional()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + LLVM_DEBUG( + dbgs() + << " Can't stripmine: loop not terminated by a conditional branch.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoLatchBranch", + L) + << "loop latch is not terminated by a conditional branch"); + return nullptr; + } + + BasicBlock *Header = L->getHeader(); + auto CheckSuccessors = [&](unsigned S1, unsigned S2) { + return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + }; + + if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { + LLVM_DEBUG(dbgs() << " Can't stripmine: only loops with one conditional" + " latch exiting the loop can be stripmined.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, + "ComplexLatchBranch", L) + << "loop has multiple exiting conditional latches"); + return nullptr; + } + + if (Header->hasAddressTaken()) { + // The loop-rotate pass can be helpful to avoid this in many cases. + LLVM_DEBUG( + dbgs() << " Won't stripmine loop: address of header block is " + "taken.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, + "HeaderAddressTaken", L) + << "loop header block has address taken"); + return nullptr; + } + + // Don't stripmine loops with the convergent attribute. + for (auto &BB : L->blocks()) + for (auto &I : *BB) + if (CallBase *CB = dyn_cast(&I)) + if (CB->isConvergent()) { + LLVM_DEBUG( + dbgs() << " Won't stripmine loop: contains convergent " + "attribute.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, + "ConvergentLoop", L) + << "loop contains convergent attribute"); + return nullptr; + } + + // TODO: Generalize this condition to support stripmining with a prolog. +#ifndef NDEBUG + if (!isEpilogProfitable(L)) { + dbgs() << "Stripmining loop with unprofitable epilog.\n"; + } +#endif + + // Get the task for this loop. + return T; +} + +/// Connect the stripmining epilog code to the original loop. +/// The stripmining epilog code contains code to execute the +/// 'extra' iterations if the run-time trip count modulo the +/// stripmine count is non-zero. +/// +/// This function performs the following: +/// - Update PHI operands in the epilog loop by the new PHI nodes +/// - Branch around the epilog loop if extra iters (ModVal) is zero. +/// +static void ConnectEpilog(TapirLoopInfo &TL, Value *EpilStartIter, + Value *ModVal, BasicBlock *LoopDet, + BasicBlock *LoopEnd, BasicBlock *NewExit, + BasicBlock *Exit, BasicBlock *Preheader, + BasicBlock *EpilogPreheader, ValueToValueMapTy &VMap, + DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, + const DataLayout &DL, bool PreserveLCSSA) { + // NewExit should contain no PHI nodes. +#ifndef NDEBUG + bool ContainsPHIs = false; + for (PHINode &PN : NewExit->phis()) { + dbgs() << "NewExit PHI node: " << PN << "\n"; + ContainsPHIs = true; + } + assert(!ContainsPHIs && "NewExit should not contain PHI nodes."); +#endif + + // Create PHI nodes at NewExit (from the stripmining loop Latch and + // Preheader). Update corresponding PHI nodes in epilog loop. + IRBuilder<> B(EpilogPreheader->getTerminator()); + for (auto &InductionEntry : *TL.getInductionVars()) { + // Compute the value of this induction at NewExit. + const InductionDescriptor &II = InductionEntry.second; + // Get the new step value for this Phi. + Value *PhiIter = !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, EpilStartIter, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(EpilStartIter, II.getStep()->getType()); + Value *NewPhiStart = emitTransformedIndex(B, PhiIter, SE, DL, II); + + // Update the PHI node in the epilog loop. + PHINode *PN = cast(VMap[InductionEntry.first]); + PN->setIncomingValue(PN->getBasicBlockIndex(EpilogPreheader), NewPhiStart); + } + + Instruction *InsertPt = NewExit->getTerminator(); + B.SetInsertPoint(InsertPt); + Value *BrLoopExit = B.CreateIsNotNull(ModVal, "lcmp.mod"); + assert(Exit && "Loop must have a single exit block only"); + // Split the epilogue exit to maintain loop canonicalization guarantees + SmallVector Preds(predecessors(Exit)); + SplitBlockPredecessors(Exit, Preds, ".epilog-lcssa", DT, LI, nullptr, + PreserveLCSSA); + // Add the branch to the exit block (around the stripmining loop) + B.CreateCondBr(BrLoopExit, EpilogPreheader, Exit); + InsertPt->eraseFromParent(); + if (DT) + DT->changeImmediateDominator(Exit, NewExit); + + // Split the main loop exit to maintain canonicalization guarantees. + SmallVector NewExitPreds{LoopDet}; + if (LoopEnd != NewExit) + NewExitPreds.push_back(LoopEnd); + SplitBlockPredecessors(NewExit, NewExitPreds, ".loopexit", DT, LI, nullptr, + PreserveLCSSA); +} + +/// Create a clone of the blocks in a loop and connect them together. +/// If CreateRemainderLoop is false, loop structure will not be cloned, +/// otherwise a new loop will be created including all cloned blocks, and the +/// iterator of it switches to count NewIter down to 0. +/// The cloned blocks should be inserted between InsertTop and InsertBot. +/// If loop structure is cloned InsertTop should be new preheader, InsertBot +/// new loop exit. +/// Return the new cloned loop that is created when CreateRemainderLoop is true. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, + const bool UseEpilogRemainder, const bool UnrollRemainder, + BasicBlock *InsertTop, BasicBlock *InsertBot, + BasicBlock *Preheader, std::vector &NewBlocks, + LoopBlocksDFS &LoopBlocks, + SmallVectorImpl &ExtraTaskBlocks, + SmallVectorImpl &SharedEHTaskBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { + StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; + BasicBlock *Header = L->getHeader(); + BasicBlock *Latch = L->getLoopLatch(); + Function *F = Header->getParent(); + LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO(); + Loop *ParentLoop = L->getParentLoop(); + NewLoopsMap NewLoops; + NewLoops[ParentLoop] = ParentLoop; + if (!CreateRemainderLoop) + NewLoops[L] = ParentLoop; + + // For each block in the original loop, create a new copy, + // and update the value map with the newly created values. + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); + NewBlocks.push_back(NewBB); + + // Add the cloned block to loop info. + addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); + + VMap[*BB] = NewBB; + if (Header == *BB) { + // For the first block, add a CFG connection to this newly + // created block. + InsertTop->getTerminator()->setSuccessor(0, NewBB); + } + + if (DT) { + if (Header == *BB) { + // The header is dominated by the preheader. + DT->addNewBlock(NewBB, InsertTop); + } else { + // Copy information from original loop to the clone. + BasicBlock *IDomBB = DT->getNode(*BB)->getIDom()->getBlock(); + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } + } + + if (Latch == *BB) { + // For the last block, if CreateRemainderLoop is false, create a direct + // jump to InsertBot. If not, create a loop back to cloned head. + VMap.erase((*BB)->getTerminator()); + BasicBlock *FirstLoopBB = cast(VMap[Header]); + BranchInst *LatchBR = cast(NewBB->getTerminator()); + IRBuilder<> Builder(LatchBR); + if (!CreateRemainderLoop) { + Builder.CreateBr(InsertBot); + } else { + PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, + suffix + ".iter", + FirstLoopBB->getFirstNonPHI()); + Value *IdxSub = + Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".sub"); + Value *IdxCmp = + Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); + NewIdx->addIncoming(NewIter, InsertTop); + NewIdx->addIncoming(IdxSub, NewBB); + } + LatchBR->eraseFromParent(); + } + } + + DetachInst *DI = cast(Header->getTerminator()); + // Create new copies of the EH blocks to clone. We can handle these blocks + // more simply than the loop blocks. + for (BasicBlock *BB : ExtraTaskBlocks) { + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, "." + suffix, F); + NewBlocks.push_back(NewBB); + + // Add the cloned block to loop info. + if (LI->getLoopFor(BB)) + addClonedBlockToLoopInfo(BB, NewBB, LI, NewLoops); + + VMap[BB] = NewBB; + + // Update PHI nodes in the detach-unwind destination. Strictly speaking, + // this step isn't necessary, since the epilog loop will be serialized later + // and these new entries for the PHI nodes will therefore be removed. But + // the routine for serializing the detach expects valid LLVM, so we update + // the PHI nodes here to ensure the resulting LLVM is valid. + if (DI->hasUnwindDest()) { + if (isDetachedRethrow(BB->getTerminator(), DI->getSyncRegion())) { + InvokeInst *DR = dyn_cast(BB->getTerminator()); + for (PHINode &PN : DR->getUnwindDest()->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(BB), NewBB); + } + } + } + + // Update PHI nodes in successors of ExtraTaskBlocks, based on the cloned + // values. + for (BasicBlock *BB : ExtraTaskBlocks) { + for (BasicBlock *Succ : successors(BB)) { + if (VMap.count(Succ)) + continue; + + for (PHINode &PN : Succ->phis()) { + Value *Val = PN.getIncomingValueForBlock(BB); + Value *NewVal = VMap.count(Val) ? cast(VMap[Val]) : Val; + PN.addIncoming(NewVal, cast(VMap[BB])); + } + } + } + + // Update DT to accommodate cloned ExtraTaskBlocks. + if (DT) { + for (BasicBlock *BB : ExtraTaskBlocks) { + BasicBlock *NewBB = cast(VMap[BB]); + // Copy information from original loop to the clone, if it's available. + BasicBlock *IDomBB = DT->getNode(BB)->getIDom()->getBlock(); + if (VMap.count(IDomBB)) { + DT->addNewBlock(NewBB, cast(VMap[IDomBB])); + } else { + BasicBlock *NewIDom = nullptr; + // Get the idom of BB's predecessors. + for (BasicBlock *Pred : predecessors(BB)) + if (VMap.count(Pred)) { + if (NewIDom) + NewIDom = DT->findNearestCommonDominator(NewIDom, Pred); + else + NewIDom = Pred; + } + // Use this computed idom (or its clone) as the idom of the cloned BB. + if (VMap.count(NewIDom)) + DT->addNewBlock(NewBB, cast(VMap[NewIDom])); + else + DT->addNewBlock(NewBB, NewIDom); + } + } + } + + // Change the incoming values to the ones defined in the preheader or + // cloned loop. + for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { + PHINode *NewPHI = cast(VMap[&*I]); + if (!CreateRemainderLoop) { + if (UseEpilogRemainder) { + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + NewPHI->removeIncomingValue(Latch, false); + } else { + VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); + NewPHI->eraseFromParent(); + } + } else { + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + BasicBlock *NewLatch = cast(VMap[Latch]); + idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + NewPHI->setIncomingBlock(idx, NewLatch); + if (Value *V = VMap.lookup(InVal)) + NewPHI->setIncomingValue(idx, V); + } + } + + // Add entries to PHI nodes outside of loop. Strictly speaking, this step + // isn't necessary, since the epilog loop will be serialized later and these + // new entries for the PHI nodes will therefore be removed. But the routine + // for serializing the detach expects valid LLVM, so we update the PHI nodes + // here to ensure the resulting LLVM is valid. + BasicBlock *ClonedHeader = cast(VMap[Header]); + DetachInst *ClonedDetach = cast(ClonedHeader->getTerminator()); + if (BasicBlock *Unwind = ClonedDetach->getUnwindDest()) + for (PHINode &PN : Unwind->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(Header), ClonedHeader); + + if (CreateRemainderLoop) { + Loop *NewLoop = NewLoops[L]; + assert(NewLoop && "L should have been cloned"); + + // Only add loop metadata if the loop is not going to be completely + // unrolled. + if (UnrollRemainder) + return NewLoop; + + // FIXME? + // // Add unroll disable metadata to disable future unrolling for this loop. + // NewLoop->setLoopAlreadyUnrolled(); + return NewLoop; + } + else + return nullptr; +} + +// Helper function to get the basic-block predecessors of the given exceptional +// continuation BB associated with task T. These predecessors are either +// enclosed by task T or come from the unwind of the detach that spawns T. +// +// TODO: Move some of this logic into TapirTaskInfo, so we don't have to +// recompute it? +static void getEHContPredecessors(BasicBlock *BB, Task *T, + SmallVectorImpl &Preds, + TaskInfo &TI) { + DetachInst *DI = T->getDetach(); + assert(DI && "Root task does not have an exceptional continuation."); + assert(DI->hasUnwindDest() && + "Task does not have an exceptional continuation."); + + // Get the predecessors of BB enclosed by task T. + for (BasicBlock *Pred : predecessors(BB)) + if (T->encloses(Pred)) + Preds.push_back(Pred); + + // If the unwind destination of the detach is the exceptional continuation BB, + // add the block that performs the detach and return. + if (DI->getUnwindDest() == BB) { + Preds.push_back(DI->getParent()); + return; + } + + // Get the predecessor that comes from the unwind of the detach. + BasicBlock *DetUnwind = DI->getUnwindDest(); + while (DetUnwind->getUniqueSuccessor() != BB) + DetUnwind = DetUnwind->getUniqueSuccessor(); + Preds.push_back(DetUnwind); +} + +// Helper method to nest the exception-handling code of a task with exceptional +// continuation EHCont within a new parent task. +static BasicBlock *NestDetachUnwindPredecessors( + BasicBlock *EHCont, Value *EHContLPad, ArrayRef Preds, + BasicBlock *NewDetachBB, const char *Suffix1, const char *Suffix2, + LandingPadInst *OrigLPad, Value *SyncReg, Module *M, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + BasicBlock *InnerUD, *OuterUD; + Value *InnerUDLPad; + Type *OrigLPadTy = OrigLPad->getType(); + if (EHCont->isLandingPad()) { + SmallVector NewBBs; + SplitLandingPadPredecessors(EHCont, Preds, Suffix1, Suffix2, NewBBs, DT, LI, + MSSAU, PreserveLCSSA); + InnerUD = NewBBs[0]; + OuterUD = NewBBs[1]; + InnerUDLPad = InnerUD->getLandingPadInst(); + + // Remove InnerUD from the PHI nodes in EHCont. + for (PHINode &PN : EHCont->phis()) + PN.removeIncomingValue(InnerUD); + } else { + // Split the given Task predecessors of EHCont, which are given in Preds. + InnerUD = SplitBlockPredecessors(EHCont, Preds, Suffix1, DT, LI, MSSAU, + PreserveLCSSA); + // Split the NewDetachBB predecessor of EHCont. + OuterUD = SplitBlockPredecessors(EHCont, {NewDetachBB}, Suffix2, DT, LI, + MSSAU, PreserveLCSSA); + + // Create a new landing pad for the outer detach by cloning the landing pad + // from the old detach-unwind destination. + Instruction *Clone = OrigLPad->clone(); + Clone->setName(Twine("lpad") + Suffix2); + Clone->insertInto(OuterUD, OuterUD->getFirstInsertionPt()); + + // Update the PHI nodes in EHCont to accommodate OuterUD. If the PHI node + // corresponds to the EHCont landingpad value, set its incoming value from + // OuterUD to be the new landingpad. For all other PHI nodes, use the + // incoming value associated with InnerUD. + Value *OuterUDTmpVal = nullptr; + for (PHINode &PN : EHCont->phis()) { + if (&PN == EHContLPad) { + int OuterUDIdx = PN.getBasicBlockIndex(OuterUD); + OuterUDTmpVal = PN.getIncomingValue(OuterUDIdx); + PN.setIncomingValue(OuterUDIdx, Clone); + } else + PN.setIncomingValue(PN.getBasicBlockIndex(OuterUD), + PN.getIncomingValueForBlock(InnerUD)); + } + + if (Instruction *OuterUDTmpInst = dyn_cast(OuterUDTmpVal)) { + // Remove the temporary value for the new detach's unwind. + assert(OuterUDTmpInst->hasNUses(0) && + "Unexpected uses of a detach-unwind temporary value."); + OuterUDTmpInst->eraseFromParent(); + } + + // Remove InnerUD from the PHI nodes in EHCont. Record the value of the + // EHCont landingpad that comes from InnerUD. + InnerUDLPad = EHContLPad; + for (PHINode &PN : EHCont->phis()) { + if (&PN == EHContLPad) + InnerUDLPad = PN.getIncomingValueForBlock(InnerUD); + PN.removeIncomingValue(InnerUD); + } + } + + // Replace the termination of InnerUD with a detached rethrow. Start by + // creating a block for the unreachable destination of the detached rethrow. + BasicBlock *NewUnreachable = + SplitBlock(InnerUD, InnerUD->getTerminator(), DT, LI); + NewUnreachable->setName(InnerUD->getName() + ".unreachable"); + + // Insert a detached rethrow to the end of InnerUD. NewUnreachable is the + // normal destination of this detached rethrow, and OuterUD is the unwind + // destination. + ReplaceInstWithInst( + InnerUD->getTerminator(), + InvokeInst::Create(Intrinsic::getDeclaration( + M, Intrinsic::detached_rethrow, {OrigLPadTy}), + NewUnreachable, OuterUD, {SyncReg, InnerUDLPad})); + + // Terminate NewUnreachable with an unreachable. + IRBuilder<> B(NewUnreachable->getTerminator()); + Instruction *UnreachableTerm = cast(B.CreateUnreachable()); + UnreachableTerm->setDebugLoc(NewUnreachable->getTerminator()->getDebugLoc()); + NewUnreachable->getTerminator()->eraseFromParent(); + + // Inform the dominator tree of the deleted edge + if (DT) + DT->deleteEdge(NewUnreachable, EHCont); + + return OuterUD; +} + +Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, + bool UnrollRemainder, LoopInfo *LI, + ScalarEvolution *SE, DominatorTree *DT, + const TargetTransformInfo &TTI, AssumptionCache *AC, + TaskInfo *TI, OptimizationRemarkEmitter *ORE, + bool PreserveLCSSA, bool ParallelEpilog, + bool NeedNestedSync, Loop **RemainderLoop) { + Task *T = getTapirLoopForStripMining(L, *TI, ORE); + if (!T) + return nullptr; + + TapirLoopInfo TL(L, T); + + // TODO: Add support for loop peeling, i.e., using a prolog. + + // Use Scalar Evolution to compute the trip count. This allows more loops to + // be stripmined than relying on induction var simplification. + if (!SE) + return nullptr; + PredicatedScalarEvolution PSE(*SE, *L); + + TL.collectIVs(PSE, LSM_NAME, ORE); + + // If no primary induction was found, just bail. + if (!TL.hasPrimaryInduction()) { + LLVM_DEBUG(dbgs() << "No primary induction variable found in loop."); + return nullptr; + } + PHINode *PrimaryInduction = TL.getPrimaryInduction().first; + LLVM_DEBUG(dbgs() << "\tPrimary induction " << *PrimaryInduction << "\n"); + + Value *TripCount = TL.getOrCreateTripCount(PSE, LSM_NAME, ORE); + if (!TripCount) { + LLVM_DEBUG(dbgs() << "Could not compute trip count.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(LSM_NAME, "NoTripCount", L) + << "could not compute finite loop trip count."); + return nullptr; + } + + LLVM_DEBUG(dbgs() << "\tTrip count " << *TripCount << "\n"); + + // Fixup all external uses of the IVs. + for (auto &InductionEntry : *TL.getInductionVars()) + TL.fixupIVUsers(InductionEntry.first, InductionEntry.second, PSE); + + // High-level algorithm: Generate an epilog for the Tapir loop and insert it + // between the original latch and its exit. Then split the entry and reattach + // block of the loop body to build the serial inner loop. + + BasicBlock *Preheader = L->getLoopPreheader(); + BranchInst *PreheaderBR = cast(Preheader->getTerminator()); + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + BasicBlock *TaskEntry = T->getEntry(); + assert(isa(Header->getTerminator()) && + "Header not terminated by a detach."); + DetachInst *DI = cast(Header->getTerminator()); + assert(DI->getDetached() == TaskEntry && + "Task entry does not match block detached from header."); + BasicBlock *ParentEntry = T->getParentTask()->getEntry(); + BranchInst *LatchBR = cast(Latch->getTerminator()); + unsigned ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *LatchExit = LatchBR->getSuccessor(ExitIndex); + + // We will use the increment of the primary induction variable to derive + // wrapping flags. + Instruction *PrimaryInc = + cast(PrimaryInduction->getIncomingValueForBlock(Latch)); + + // Get all uses of the primary induction variable in the task. + SmallVector PrimaryInductionUsesInTask; + for (Use &U : PrimaryInduction->uses()) + if (Instruction *User = dyn_cast(U.getUser())) + if (T->encloses(User->getParent())) + PrimaryInductionUsesInTask.push_back(&U); + + // Only stripmine loops with a computable trip count, and the trip count needs + // to be an int value (allowing a pointer type is a TODO item). + // We calculate the backedge count by using getExitCount on the Latch block, + // which is proven to be the only exiting block in this loop. This is same as + // calculating getBackedgeTakenCount on the loop (which computes SCEV for all + // exiting blocks). + const SCEV *BECountSC = TL.getBackedgeTakenCount(PSE); + if (isa(BECountSC) || + !BECountSC->getType()->isIntegerTy()) { + LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n"); + return nullptr; + } + + unsigned BEWidth = + cast(TL.getWidestInductionType())->getBitWidth(); + + // Add 1 since the backedge count doesn't include the first loop iteration. + const SCEV *TripCountSC = TL.getExitCount(BECountSC, PSE); + if (isa(TripCountSC)) { + LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); + return nullptr; + } + + const DataLayout &DL = Header->getModule()->getDataLayout(); + SCEVExpander Expander(*SE, DL, "loop-stripmine"); + if (!AllowExpensiveTripCount && + Expander.isHighCostExpansion(TripCountSC, L, SCEVCheapExpansionBudget, + &TTI, PreheaderBR)) { + LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); + return nullptr; + } + + // This constraint lets us deal with an overflowing trip count easily; see the + // comment on ModVal below. + if (Log2_32(Count) > BEWidth) { + LLVM_DEBUG( + dbgs() + << "Count failed constraint on overflow trip count calculation.\n"); + return nullptr; + } + + LLVM_DEBUG(dbgs() << "Stripmining loop using grainsize " << Count << "\n"); + using namespace ore; + ORE->emit([&]() { + return OptimizationRemark(LSM_NAME, "Stripmined", + L->getStartLoc(), L->getHeader()) + << "stripmined loop using count " + << NV("StripMineCount", Count); + }); + + // Loop structure is the following: + // + // Preheader + // Header + // ... + // Latch + // LatchExit + + // Insert the epilog remainder. + BasicBlock *NewPreheader; + BasicBlock *NewExit = nullptr; + BasicBlock *EpilogPreheader = nullptr; + { + // Split Preheader to insert a branch around loop for stripmining. + NewPreheader = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI); + NewPreheader->setName(Preheader->getName() + ".new"); + // Split LatchExit to create phi nodes from branch above. + SmallVector Preds(predecessors(LatchExit)); + NewExit = SplitBlockPredecessors(LatchExit, Preds, ".strpm-lcssa", + DT, LI, nullptr, PreserveLCSSA); + // NewExit gets its DebugLoc from LatchExit, which is not part of the + // original Loop. + // Fix this by setting Loop's DebugLoc to NewExit. + auto *NewExitTerminator = NewExit->getTerminator(); + NewExitTerminator->setDebugLoc(Header->getTerminator()->getDebugLoc()); + // Split NewExit to insert epilog remainder loop. + EpilogPreheader = SplitBlock(NewExit, NewExitTerminator, DT, LI); + EpilogPreheader->setName(Header->getName() + ".epil.preheader"); + } + + // Calculate conditions for branch around loop for stripmining + // in epilog case and around prolog remainder loop in prolog case. + // Compute the number of extra iterations required, which is: + // extra iterations = run-time trip count % loop stripmine factor + PreheaderBR = cast(Preheader->getTerminator()); + Value *BECount = Expander.expandCodeFor(BECountSC, BECountSC->getType(), + PreheaderBR); + + // Loop structure should be the following: + // Epilog + // + // Preheader + // *NewPreheader + // Header + // ... + // Latch + // *NewExit + // *EpilogPreheader + // LatchExit + + IRBuilder<> B(PreheaderBR); + Value *ModVal; + // Calculate ModVal = (BECount + 1) % Count. + // Note that TripCount is BECount + 1. + if (isPowerOf2_32(Count)) { + // When Count is power of 2 we don't BECount for epilog case. However we'll + // need it for a branch around stripmined loop for prolog case. + ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + // 1. There are no iterations to be run in the prolog/epilog loop. + // OR + // 2. The addition computing TripCount overflowed. + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so + // the number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we + // explicitly check this above). + if (TL.isInclusiveRange()) + ModVal = B.CreateAdd(ModVal, ConstantInt::get(ModVal->getType(), 1)); + } else { + // As (BECount + 1) can potentially unsigned overflow we count + // (BECount % Count) + 1 which is overflow safe as BECount % Count < Count. + Value *ModValTmp = B.CreateURem(BECount, + ConstantInt::get(BECount->getType(), + Count)); + Value *ModValAdd = B.CreateAdd(ModValTmp, + ConstantInt::get(ModValTmp->getType(), 1)); + // At that point (BECount % Count) + 1 could be equal to Count. + // To handle this case we need to take mod by Count one more time. + ModVal = B.CreateURem(ModValAdd, + ConstantInt::get(BECount->getType(), Count), + "xtraiter"); + } + Value *BranchVal = B.CreateICmpULT( + BECount, ConstantInt::get(BECount->getType(), + TL.isInclusiveRange() ? Count : Count - 1)); + BasicBlock *RemainderLoopBB = NewExit; + BasicBlock *StripminedLoopBB = NewPreheader; + // Branch to either remainder (extra iterations) loop or stripmined loop. + B.CreateCondBr(BranchVal, RemainderLoopBB, StripminedLoopBB); + PreheaderBR->eraseFromParent(); + if (DT) { + // if (UseEpilogRemainder) + DT->changeImmediateDominator(NewExit, Preheader); + // else + // DT->changeImmediateDominator(PrologExit, Preheader); + } + Function *F = Header->getParent(); + // Get an ordered list of blocks in the loop to help with the ordering of the + // cloned blocks in the prolog/epilog code + LoopBlocksDFS LoopBlocks(L); + LoopBlocks.perform(LI); + + // Collect extra blocks in the task that LoopInfo does not consider to be part + // of the loop, e.g., exception-handling code for the task. + SmallVector ExtraTaskBlocks; + SmallVector SharedEHTaskBlocks; + SmallPtrSet SharedEHBlockPreds; + { + SmallPtrSet Visited; + for (Task *SubT : depth_first(T)) { + for (Spindle *S : + depth_first>(SubT->getEntrySpindle())) { + // Only visit shared-eh spindles once a piece. + if (S->isSharedEH() && !Visited.insert(S).second) + continue; + + for (BasicBlock *BB : S->blocks()) { + // Skip blocks in the loop. + if (!L->contains(BB)) { + ExtraTaskBlocks.push_back(BB); + + if (!T->simplyEncloses(BB) && S->isSharedEH()) { + SharedEHTaskBlocks.push_back(BB); + if (S->getEntry() == BB) + for (BasicBlock *Pred : predecessors(BB)) + if (T->simplyEncloses(Pred)) + SharedEHBlockPreds.insert(Pred); + } + } + } + } + } + } + + SmallVector Reattaches; + SmallVector EHBlocksToClone; + SmallPtrSet EHBlockPreds; + SmallPtrSet InlinedLPads; + SmallVector DetachedRethrows; + // Analyze the original task for serialization. + AnalyzeTaskForSerialization(T, Reattaches, EHBlocksToClone, EHBlockPreds, + InlinedLPads, DetachedRethrows); + bool NeedToInsertTaskFrame = taskContainsSync(T); + + // If this detach can throw, get the exceptional continuation of the detach + // and its associated landingpad value. + BasicBlock *EHCont = nullptr; + Value *EHContLPadVal = nullptr; + SmallVector UDPreds; + if (DI->hasUnwindDest()) { + EHCont = T->getEHContinuationSpindle()->getEntry(); + EHContLPadVal = T->getLPadValueInEHContinuationSpindle(); + getEHContPredecessors(EHCont, T, UDPreds, *TI); + } + + // For each extra loop iteration, create a copy of the loop's basic blocks + // and generate a condition that branches to the copy depending on the + // number of 'left over' iterations. + // + std::vector NewBlocks; + ValueToValueMapTy VMap; + + // TODO: For stripmine factor 2 remainder loop will have 1 iterations. + // Do not create 1 iteration loop. + // bool CreateRemainderLoop = (Count != 2); + bool CreateRemainderLoop = true; + + // Clone all the basic blocks in the loop. If Count is 2, we don't clone + // the loop, otherwise we create a cloned loop to execute the extra + // iterations. This function adds the appropriate CFG connections. + BasicBlock *InsertBot = LatchExit; + BasicBlock *InsertTop = EpilogPreheader; + *RemainderLoop = + CloneLoopBlocks(L, ModVal, CreateRemainderLoop, true, UnrollRemainder, + InsertTop, InsertBot, NewPreheader, NewBlocks, LoopBlocks, + ExtraTaskBlocks, SharedEHTaskBlocks, VMap, DT, LI); + + // Insert the cloned blocks into the function. + F->splice(InsertBot->getIterator(), &*F, NewBlocks[0]->getIterator(), + F->end()); + + // Loop structure should be the following: + // Epilog + // + // Preheader + // NewPreheader + // Header + // ... + // Latch + // NewExit + // EpilogPreheader + // EpilogHeader + // ... + // EpilogLatch + // LatchExit + + // Rewrite the cloned instruction operands to use the values created when the + // clone is created. + for (BasicBlock *BB : NewBlocks) + for (Instruction &I : *BB) + RemapInstruction(&I, VMap, + RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + + // Serialize the cloned loop body to render the inner loop serial. + { + // Translate all the analysis for the new cloned task. + SmallVector ClonedReattaches; + for (Instruction *I : Reattaches) + ClonedReattaches.push_back(cast(VMap[I])); + SmallPtrSet ClonedEHBlockPreds; + for (BasicBlock *B : EHBlockPreds) + ClonedEHBlockPreds.insert(cast(VMap[B])); + SmallVector ClonedEHBlocks; + for (BasicBlock *B : EHBlocksToClone) + ClonedEHBlocks.push_back(cast(VMap[B])); + // Landing pads and detached-rethrow instructions may or may not have been + // cloned. + SmallPtrSet ClonedInlinedLPads; + for (LandingPadInst *LPad : InlinedLPads) { + if (VMap[LPad]) + ClonedInlinedLPads.insert(cast(VMap[LPad])); + else + ClonedInlinedLPads.insert(LPad); + } + SmallVector ClonedDetachedRethrows; + for (Instruction *DR : DetachedRethrows) { + if (VMap[DR]) + ClonedDetachedRethrows.push_back(cast(VMap[DR])); + else + ClonedDetachedRethrows.push_back(DR); + } + DetachInst *ClonedDI = cast(VMap[DI]); + // Serialize the new task. + SerializeDetach(ClonedDI, ParentEntry, EHCont, EHContLPadVal, + ClonedReattaches, &ClonedEHBlocks, &ClonedEHBlockPreds, + &ClonedInlinedLPads, &ClonedDetachedRethrows, + NeedToInsertTaskFrame, DT, LI); + } + + // Detach the stripmined loop. + Value *SyncReg = DI->getSyncRegion(), *NewSyncReg; + BasicBlock *EpilogPred, *LoopDetEntry, *LoopReattach; + Module *M = F->getParent(); + if (ParallelEpilog) { + ORE->emit([&]() { + return OptimizationRemark(LSM_NAME, "ParallelEpil", + L->getStartLoc(), L->getHeader()) + << "allowing epilog to execute in parallel with stripmined " + << "loop"; + }); + BasicBlock *LoopDetach = SplitBlock(NewPreheader, + NewPreheader->getTerminator(), DT, LI); + LoopDetach->setName(NewPreheader->getName() + ".strpm.detachloop"); + { + SmallVector HeaderPreds; + for (BasicBlock *Pred : predecessors(Header)) + if (Pred != Latch) + HeaderPreds.push_back(Pred); + LoopDetEntry = + SplitBlockPredecessors(Header, HeaderPreds, ".strpm.detachloop.entry", + DT, LI, nullptr, PreserveLCSSA); + NewSyncReg = CallInst::Create( + Intrinsic::getDeclaration(M, Intrinsic::syncregion_start), {}, + &*LoopDetEntry->getFirstInsertionPt()); + NewSyncReg->setName(SyncReg->getName() + ".strpm.detachloop"); + } + LoopReattach = SplitEdge(Latch, NewExit, DT, LI); + LoopReattach->setName(Header->getName() + ".strpm.detachloop.reattach"); + + // Clone any shared-EH spindles in the stripmined loop to prevent tasks at + // different nesting levels from sharing an EH spindle. + if (!SharedEHTaskBlocks.empty()) + cloneEHBlocks(F, SharedEHTaskBlocks, SharedEHBlockPreds, ".strpm", + nullptr, nullptr, DT, LI); + + // Insert new detach instructions + if (DI->hasUnwindDest()) { + // Insert a detach instruction to detach the stripmined loop. We do this + // early to simplify the operation of nesting the exception-handling code + // in the task. + ReplaceInstWithInst(LoopDetach->getTerminator(), + DetachInst::Create(LoopDetEntry, NewExit, + EHCont, SyncReg)); + // Update the dominator tree to reflect LoopDetach as a new predecessor of + // EHCont. + BasicBlock *OldIDom = DT->getNode(EHCont)->getIDom()->getBlock(); + DT->changeImmediateDominator( + EHCont, DT->findNearestCommonDominator(OldIDom, LoopDetach)); + // Update the PHIs in EHCont with temporary values from LoopDetach. These + // values will be fixed by NestDetachUnwindPredecessors. + for (PHINode &PN : EHCont->phis()) + PN.addIncoming(UndefValue::get(PN.getType()), LoopDetach); + + // Nest the exceptional code in the original task into the new task. + /* BasicBlock *OuterUD = */ NestDetachUnwindPredecessors( + EHCont, EHContLPadVal, UDPreds, LoopDetach, ".strpm", + ".strpm.detachloop.unwind", DI->getUnwindDest()->getLandingPadInst(), + SyncReg, M, DT, LI, nullptr, PreserveLCSSA); + + // Replace sync regions of existing detached-rethrows. + for (Instruction *I : DetachedRethrows) { + InvokeInst *II = cast(I); + II->setArgOperand(0, NewSyncReg); + } + } else { + // Insert a detach instruction to detach the stripmined loop. + ReplaceInstWithInst(LoopDetach->getTerminator(), + DetachInst::Create(LoopDetEntry, NewExit, SyncReg)); + LoopDetach->getTerminator()->setDebugLoc( + Header->getTerminator()->getDebugLoc()); + } + // Insert a reattach instruction after the detached stripmined loop. + ReplaceInstWithInst(LoopReattach->getTerminator(), + ReattachInst::Create(NewExit, SyncReg)); + LoopReattach->getTerminator()->setDebugLoc( + LoopDetach->getTerminator()->getDebugLoc()); + EpilogPred = LoopDetach; + } else { + NewSyncReg = SyncReg; + LoopReattach = NewExit; + LoopDetEntry = NewPreheader; + } + + // Get the set of new loop blocks + SetVector NewLoopBlocks; + { + LoopBlocksDFS NewLoopBlocksDFS(L); + NewLoopBlocksDFS.perform(LI); + LoopBlocksDFS::RPOIterator BlockBegin = NewLoopBlocksDFS.beginRPO(); + LoopBlocksDFS::RPOIterator BlockEnd = NewLoopBlocksDFS.endRPO(); + for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) + NewLoopBlocks.insert(*BB); + } + // Create structure in LI for new loop. + Loop *ParentLoop = L->getParentLoop(); + Loop *NewLoop = LI->AllocateLoop(); + if (ParentLoop) + ParentLoop->replaceChildLoopWith(L, NewLoop); + else + LI->changeTopLevelLoop(L, NewLoop); + NewLoop->addChildLoop(L); + + // Move the detach/reattach instructions to surround the stripmined loop. + BasicBlock *NewHeader; + { + SmallVector HeaderPreds; + for (BasicBlock *Pred : predecessors(Header)) + if (Pred != Latch) + HeaderPreds.push_back(Pred); + NewHeader = + SplitBlockPredecessors(Header, HeaderPreds, ".strpm.outer", + DT, LI, nullptr, PreserveLCSSA); + } + BasicBlock *NewEntry = + SplitBlock(NewHeader, NewHeader->getTerminator(), DT, LI); + NewEntry->setName(TaskEntry->getName() + ".strpm.outer"); + SmallVector LoopReattachPreds{Latch}; + BasicBlock *NewReattB = + SplitBlockPredecessors(LoopReattach, LoopReattachPreds, "", DT, LI, + nullptr, PreserveLCSSA); + NewReattB->setName(Latch->getName() + ".reattach"); + BasicBlock *NewLatch = + SplitBlock(NewReattB, NewReattB->getTerminator(), DT, LI); + NewLatch->setName(Latch->getName() + ".strpm.outer"); + + // Move static allocas from TaskEntry into NewEntry. + MoveStaticAllocasInBlock(NewEntry, TaskEntry, Reattaches); + + // Insert a new detach instruction + BasicBlock *OrigUnwindDest = DI->getUnwindDest(); + if (OrigUnwindDest) { + ReplaceInstWithInst(NewHeader->getTerminator(), + DetachInst::Create(NewEntry, NewLatch, + OrigUnwindDest, NewSyncReg)); + // Update the PHI nodes in the unwind destination of the detach. + for (PHINode &PN : OrigUnwindDest->phis()) + PN.setIncomingBlock(PN.getBasicBlockIndex(Header), NewHeader); + + // Update DT. Walk the path of unique successors from the unwind + // destination to change the immediate dominators of these nodes. Continue + // updating until OrigDUBB equals the exceptional continuation or, as in the + // case of a parallel epilog, we reach a detached-rethrow. + BasicBlock *OrigDUBB = OrigUnwindDest; + BasicBlock *NewDomCandidate = NewHeader; + if (ParallelEpilog && NeedNestedSync) + // We will insert a sync.unwind to OrigUnwindDest, which changes the + // dominator. + NewDomCandidate = + DT->findNearestCommonDominator(NewHeader, LoopReattach); + while (OrigDUBB && (OrigDUBB != EHCont)) { + BasicBlock *OldIDom = + DT->getNode(OrigDUBB)->getIDom()->getBlock(); + DT->changeImmediateDominator( + OrigDUBB, DT->findNearestCommonDominator(OldIDom, NewDomCandidate)); + // Get the next block along the path. If we reach the end of the path at + // a detached-rethrow, then getUniqueSuccessor() returns nullptr. + OrigDUBB = OrigDUBB->getUniqueSuccessor(); + } + // If OrigDUBB equals EHCont, then this is the last block we aim to update. + if (OrigDUBB == EHCont) { + BasicBlock *OldIDom = DT->getNode(EHCont)->getIDom()->getBlock(); + DT->changeImmediateDominator( + EHCont, DT->findNearestCommonDominator(OldIDom, NewDomCandidate)); + } + } else + ReplaceInstWithInst(NewHeader->getTerminator(), + DetachInst::Create(NewEntry, NewLatch, NewSyncReg)); + // Replace the old detach instruction with a branch + ReplaceInstWithInst(Header->getTerminator(), + BranchInst::Create(DI->getDetached())); + + // Replace the old reattach instructions with branches. Along the way, + // determine their common dominator. + BasicBlock *ReattachDom = nullptr; + for (Instruction *I : Reattaches) { + if (!ReattachDom) + ReattachDom = I->getParent(); + else + ReattachDom = DT->findNearestCommonDominator(ReattachDom, I->getParent()); + ReplaceInstWithInst(I, BranchInst::Create(Latch)); + } + assert(ReattachDom && "No reattach-dominator block found"); + // Insert a reattach at the end of NewReattB. + ReplaceInstWithInst(NewReattB->getTerminator(), + ReattachInst::Create(NewLatch, NewSyncReg)); + // Update the dominator tree, and determine predecessors of epilog. + if (DT->dominates(Header, Latch)) + DT->changeImmediateDominator(Latch, ReattachDom); + if (ParallelEpilog) + DT->changeImmediateDominator(LoopReattach, NewLatch); + else + EpilogPred = NewLatch; + + // The block structure of the stripmined loop should now look like so: + // + // LoopDetEntry + // NewHeader (detach NewEntry, NewLatch) + // NewEntry + // Header + // TaskEntry + // ... + // Latch (br Header, NewReattB) + // NewReattB (reattach NewLatch) + // NewLatch (br LoopReattach) + // LoopReattach + + // Add check of stripmined loop count. + IRBuilder<> B2(LoopDetEntry->getTerminator()); + + // We compute the loop count of the outer loop using a UDiv by the power-of-2 + // count to ensure that ScalarEvolution can handle this outer loop once we're + // done. + // + // TODO: Generalize to handle non-power-of-2 counts. + assert(isPowerOf2_32(Count) && "Count is not a power of 2."); + Value *TestVal = B2.CreateUDiv(TripCount, + ConstantInt::get(TripCount->getType(), Count), + "stripiter"); + // Value *TestVal = B2.CreateSub(TripCount, ModVal, "stripiter", true, true); + + // Value *TestCmp = B2.CreateICmpUGT(TestVal, + // ConstantInt::get(TestVal->getType(), 0), + // TestVal->getName() + ".ncmp"); + // ReplaceInstWithInst(NewPreheader->getTerminator(), + // BranchInst::Create(Header, LatchExit, TestCmp)); + // DT->changeImmediateDominator(LatchExit, + // DT->findNearestCommonDominator(LatchExit, + // NewPreheader)); + + // Add new counter for new outer loop. + // + // We introduce a new primary induction variable, NewIdx, into the outer loop, + // which counts up to the outer-loop trip count from 0, stepping by 1. In + // contrast to counting down from the outer-loop trip count, this new variable + // ensures that future loop passes, including LoopSpawning, can process this + // outer loop when we're done. + PHINode *NewIdx = PHINode::Create(TestVal->getType(), 2, "niter", + NewHeader->getFirstNonPHI()); + B2.SetInsertPoint(NewLatch->getTerminator()); + // Instruction *IdxSub = cast( + // B2.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + // NewIdx->getName() + ".nsub")); + // IdxSub->copyIRFlags(PrimaryInc); + Instruction *IdxAdd = cast( + B2.CreateAdd(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".nadd")); + IdxAdd->copyIRFlags(PrimaryInc); + + // NewIdx->addIncoming(TestVal, NewPreheader); + // NewIdx->addIncoming(IdxSub, NewLatch); + // Value *IdxCmp = B2.CreateIsNull(IdxSub, NewIdx->getName() + ".ncmp"); + NewIdx->addIncoming(ConstantInt::get(TestVal->getType(), 0), LoopDetEntry); + NewIdx->addIncoming(IdxAdd, NewLatch); + Value *IdxCmp = B2.CreateICmpEQ(IdxAdd, TestVal, + NewIdx->getName() + ".ncmp"); + ReplaceInstWithInst(NewLatch->getTerminator(), + BranchInst::Create(LoopReattach, NewHeader, IdxCmp)); + DT->changeImmediateDominator(NewLatch, NewHeader); + // The block structure of the stripmined loop should now look like so: + // + // LoopDetEntry + // NewHeader (detach NewEntry, NewLatch) + // NewEntry + // Header + // TaskEntry + // ... + // Latch (br Header, NewReattB) + // NewReattB (reattach NewLatch) + // NewLatch (br NewHeader, LoopReattach) + // LoopReattach + + // If necessary, add the nested sync right before LoopReattach. + if (ParallelEpilog && NeedNestedSync) { + BasicBlock *NewLoopReattach = + SplitBlock(LoopReattach, LoopReattach->getTerminator(), DT, LI); + BasicBlock *NestedSyncBlock = LoopReattach; + LoopReattach = NewLoopReattach; + NestedSyncBlock->setName(Header->getName() + ".strpm.detachloop.sync"); + ReplaceInstWithInst(NestedSyncBlock->getTerminator(), + SyncInst::Create(LoopReattach, NewSyncReg)); + if (!OrigUnwindDest && F->doesNotThrow()) { + // Insert a call to sync.unwind. + CallInst *SyncUnwind = CallInst::Create( + Intrinsic::getDeclaration(M, Intrinsic::sync_unwind), { NewSyncReg }, + "", LoopReattach->getFirstNonPHIOrDbg()); + // If the Tapir loop has an unwind destination, change the sync.unwind to + // an invoke that unwinds to the cloned unwind destination. + if (OrigUnwindDest) { + BasicBlock *NewBB = + changeToInvokeAndSplitBasicBlock(SyncUnwind, OrigUnwindDest); + + // Update LI. + if (Loop *L = LI->getLoopFor(LoopReattach)) + L->addBasicBlockToLoop(NewBB, *LI); + + // Update DT: LoopReattach dominates Split, which dominates all other + // nodes previously dominated by LoopReattach. + if (DomTreeNode *OldNode = DT->getNode(LoopReattach)) { + std::vector Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(NewBB, LoopReattach); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + } + } + } + + // Fixup the LoopInfo for the new loop. + if (!ParentLoop) { + NewLoop->addBasicBlockToLoop(NewHeader, *LI); + NewLoop->addBasicBlockToLoop(NewEntry, *LI); + for (BasicBlock *BB : NewLoopBlocks) { + NewLoop->addBlockEntry(BB); + } + NewLoop->addBasicBlockToLoop(NewReattB, *LI); + NewLoop->addBasicBlockToLoop(NewLatch, *LI); + } else { + LI->changeLoopFor(NewHeader, NewLoop); + NewLoop->addBlockEntry(NewHeader); + LI->changeLoopFor(NewEntry, NewLoop); + NewLoop->addBlockEntry(NewEntry); + for (BasicBlock *BB : NewLoopBlocks) + NewLoop->addBlockEntry(BB); + LI->changeLoopFor(NewReattB, NewLoop); + NewLoop->addBlockEntry(NewReattB); + LI->changeLoopFor(NewLatch, NewLoop); + NewLoop->addBlockEntry(NewLatch); + } + // Update loop metadata + NewLoop->setLoopID(L->getLoopID()); + TapirLoopHints Hints(L); + Hints.clearHintsMetadata(); + + // Update all of the old PHI nodes + B2.SetInsertPoint(NewEntry->getTerminator()); + Instruction *CountVal = cast( + B2.CreateMul(ConstantInt::get(NewIdx->getType(), Count), + NewIdx)); + CountVal->copyIRFlags(PrimaryInduction); + for (auto &InductionEntry : *TL.getInductionVars()) { + PHINode *OrigPhi = InductionEntry.first; + const InductionDescriptor &II = InductionEntry.second; + if (II.getStep()->isZero()) + // Nothing to do for this Phi + continue; + // Get the new step value for this Phi. + Value *PhiCount = !II.getStep()->getType()->isIntegerTy() + ? B2.CreateCast(Instruction::SIToFP, CountVal, + II.getStep()->getType()) + : B2.CreateSExtOrTrunc(CountVal, II.getStep()->getType()); + Value *NewStart = emitTransformedIndex(B2, PhiCount, SE, DL, II); + + // Get the old increment instruction for this Phi + int Idx = OrigPhi->getBasicBlockIndex(NewEntry); + OrigPhi->setIncomingValue(Idx, NewStart); + } + + // Add new induction variable for inner loop. + PHINode *InnerIdx = PHINode::Create(PrimaryInduction->getType(), 2, + "inneriter", + Header->getFirstNonPHI()); + Value *InnerTestVal = ConstantInt::get(PrimaryInduction->getType(), Count); + B2.SetInsertPoint(LatchBR); + Instruction *InnerSub = cast( + B2.CreateSub(InnerIdx, ConstantInt::get(InnerIdx->getType(), 1), + InnerIdx->getName() + ".nsub")); + InnerSub->copyIRFlags(PrimaryInc); + // Instruction *InnerAdd = cast( + // B2.CreateAdd(InnerIdx, ConstantInt::get(InnerIdx->getType(), 1), + // InnerIdx->getName() + ".nadd")); + // InnerAdd->copyIRFlags(PrimaryInc); + Value *InnerCmp; + if (LatchBR->getSuccessor(0) == Header) + InnerCmp = B2.CreateIsNotNull(InnerSub, InnerIdx->getName() + ".ncmp"); + else + InnerCmp = B2.CreateIsNull(InnerSub, InnerIdx->getName() + ".ncmp"); + InnerIdx->addIncoming(InnerTestVal, NewEntry); + InnerIdx->addIncoming(InnerSub, Latch); + // if (LatchBR->getSuccessor(0) == Header) + // InnerCmp = B2.CreateICmpNE(InnerAdd, InnerTestVal, + // InnerIdx->getName() + ".ncmp"); + // else + // InnerCmp = B2.CreateICmpEQ(InnerAdd, InnerTestVal, + // InnerIdx->getName() + ".ncmp"); + // InnerIdx->addIncoming(ConstantInt::get(InnerIdx->getType(), 0), NewEntry); + // InnerIdx->addIncoming(InnerAdd, Latch); + LatchBR->setCondition(InnerCmp); + + // Connect the epilog code to the original loop and update the PHI functions. + B2.SetInsertPoint(EpilogPreheader->getTerminator()); + + // Compute the start of the epilog iterations. We use a divide and multiply + // by the power-of-2 count to simplify the SCEV's of the induction variables + // for later analysis passes. + // Value *EpilStartIter = B2.CreateSub(TripCount, ModVal); + Value *EpilStartIter = + B2.CreateMul(B2.CreateUDiv(TripCount, + ConstantInt::get(TripCount->getType(), Count)), + ConstantInt::get(TripCount->getType(), Count)); + if (Instruction *ESIInst = dyn_cast(EpilStartIter)) + ESIInst->copyIRFlags(PrimaryInc); + ConnectEpilog(TL, EpilStartIter, ModVal, EpilogPred, LoopReattach, NewExit, + LatchExit, Preheader, EpilogPreheader, VMap, DT, LI, SE, DL, + PreserveLCSSA); + + // If this loop is nested, then the loop stripminer changes the code in the + // any of its parent loops, so the Scalar Evolution pass needs to be run + // again. + SE->forgetTopmostLoop(L); + + // FIXME: Optionally unroll remainder loop + // + // if (RemainderLoop && UnrollRemainder) { + // LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n"); + // UnrollLoop(RemainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1, + // /*Force*/ false, /*AllowRuntime*/ false, + // /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, + // /*PreserveOnlyFirst*/ false, /*TripMultiple*/ 1, + // /*PeelCount*/ 0, /*UnrollRemainder*/ false, LI, SE, DT, AC, + // /*TI*/ nullptr, /*ORE*/ nullptr, /*PreserveLCSSA*/ true); + // } + + // Record that the remainder loop was derived from a Tapir loop. + (*RemainderLoop)->setDerivedFromTapirLoop(); + + // At this point, the code is well formed. We now simplify the new loops, + // doing constant propagation and dead code elimination as we go. + simplifyLoopAfterStripMine(L, /*SimplifyIVs*/ true, LI, SE, DT, TTI, AC); + simplifyLoopAfterStripMine(NewLoop, /*SimplifyIVs*/ true, LI, SE, DT, TTI, + AC); + simplifyLoopAfterStripMine(*RemainderLoop, /*SimplifyIVs*/ true, LI, SE, DT, + TTI, AC); + +#ifndef NDEBUG + DT->verify(); + LI->verify(*DT); +#endif + + // Record that the old loop was derived from a Tapir loop. + L->setDerivedFromTapirLoop(); + + // Update TaskInfo manually using the updated DT. + if (TI) + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI->recalculate(*F, *DT); + + return NewLoop; +} diff --git a/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp b/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp new file mode 100644 index 00000000000000..2d55d6387eaba7 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp @@ -0,0 +1,454 @@ +//===- LoopStripMinePass.cpp - Loop strip-mining pass ---------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to perform Tapir loop strip-mining. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/LoopStripMinePass.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/WorkSpanAnalysis.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstructionCost.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Tapir.h" +#include "llvm/Transforms/Tapir/LoopStripMine.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "loop-stripmine" + +cl::opt llvm::EnableTapirLoopStripmine( + "stripmine-loops", cl::init(true), cl::Hidden, + cl::desc("Run the Tapir Loop stripmining pass")); + +static cl::opt AllowParallelEpilog( + "allow-parallel-epilog", cl::Hidden, cl::init(true), + cl::desc("Allow stripmined Tapir loops to execute their epilogs in parallel.")); + +static cl::opt IncludeNestedSync( + "include-nested-sync", cl::Hidden, cl::init(true), + cl::desc("If the epilog is allowed to execute in parallel, include a sync " + "instruction in the nested task.")); + +static cl::opt RequireParallelEpilog( + "require-parallel-epilog", cl::Hidden, cl::init(false), + cl::desc("Require stripmined Tapir loops to execute their epilogs in " + "parallel. Intended for debugging.")); + +/// Create an analysis remark that explains why stripmining failed +/// +/// \p RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be streamed +/// to. +static OptimizationRemarkAnalysis +createMissedAnalysis(StringRef RemarkName, const Loop *TheLoop, + Instruction *I = nullptr) { + const Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(DEBUG_TYPE, RemarkName, DL, CodeRegion); + R << "loop not stripmined: "; + return R; +} + + +/// Approximate the work of the body of the loop L. Returns several relevant +/// properties of loop L via by-reference arguments. +static InstructionCost ApproximateLoopCost( + const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, + bool &Convergent, bool &IsRecursive, bool &UnknownSize, + const TargetTransformInfo &TTI, LoopInfo *LI, ScalarEvolution &SE, + const SmallPtrSetImpl &EphValues, + TargetLibraryInfo *TLI) { + + WSCost LoopCost; + estimateLoopCost(LoopCost, L, LI, &SE, TTI, TLI, EphValues); + + // Exclude calls to builtins when counting the calls. This assumes that all + // builtin functions are cheap. + NumCalls = LoopCost.Metrics.NumCalls - LoopCost.Metrics.NumBuiltinCalls; + NotDuplicatable = LoopCost.Metrics.notDuplicatable; + Convergent = LoopCost.Metrics.convergent; + IsRecursive = LoopCost.Metrics.isRecursive; + UnknownSize = LoopCost.UnknownCost; + + return LoopCost.Work; +} + +static bool tryToStripMineLoop( + Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + const TargetTransformInfo &TTI, AssumptionCache &AC, TaskInfo *TI, + OptimizationRemarkEmitter &ORE, TargetLibraryInfo *TLI, bool PreserveLCSSA, + std::optional ProvidedCount) { + Task *T = getTaskIfTapirLoopStructure(L, TI); + if (!T) + return false; + TapirLoopHints Hints(L); + + if (TM_Disable == hasLoopStripmineTransformation(L)) + return false; + + LLVM_DEBUG(dbgs() << "Loop Strip Mine: F[" + << L->getHeader()->getParent()->getName() << "] Loop %" + << L->getHeader()->getName() << "\n"); + + if (!L->isLoopSimplifyForm()) { + LLVM_DEBUG( + dbgs() << " Not stripmining loop which is not in loop-simplify " + "form.\n"); + return false; + } + bool StripMiningRequested = + (hasLoopStripmineTransformation(L) == TM_ForcedByUser); + TargetTransformInfo::StripMiningPreferences SMP = + gatherStripMiningPreferences(L, SE, TTI, ProvidedCount); + + unsigned NumCalls = 0; + bool NotDuplicatable = false; + bool Convergent = false; + bool IsRecursive = false; + bool UnknownSize = false; + + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + + InstructionCost LoopCost = + ApproximateLoopCost(L, NumCalls, NotDuplicatable, Convergent, IsRecursive, + UnknownSize, TTI, LI, SE, EphValues, TLI); + // Determine the iteration count of the eventual stripmined the loop. + bool explicitCount = computeStripMineCount(L, TTI, LoopCost, SMP); + + // If the loop size is unknown, then we cannot compute a stripmining count for + // it. + if (!explicitCount && UnknownSize) { + LLVM_DEBUG(dbgs() << " Not stripmining loop with unknown size.\n"); + ORE.emit(createMissedAnalysis("UnknownSize", L) + << "Cannot stripmine loop with unknown size."); + return false; + } + + // If the loop size is enormous, then we might want to use a stripmining count + // of 1 for it. + LLVM_DEBUG(dbgs() << " Loop Cost = " << LoopCost << "\n"); + if (!explicitCount && InstructionCost::getMax() == LoopCost) { + LLVM_DEBUG(dbgs() << " Not stripmining loop with very large size.\n"); + if (Hints.getGrainsize() == 1) + return false; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "HugeLoop", + L->getStartLoc(), L->getHeader()) + << "using grainsize 1 for huge loop"; + }); + Hints.setAlreadyStripMined(); + return true; + } + + // If the loop is recursive, set the stripmine factor to be 1. + if (!explicitCount && IsRecursive) { + LLVM_DEBUG(dbgs() << " Not stripmining loop that recursively calls the " + << "containing function.\n"); + if (Hints.getGrainsize() == 1) + return false; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "RecursiveCalls", + L->getStartLoc(), L->getHeader()) + << "using grainsize 1 for loop with recursive calls"; + }); + Hints.setAlreadyStripMined(); + return true; + } + + // TODO: We can stripmine loops if the stripmined version does not require a + // prolog or epilog. + if (NotDuplicatable) { + LLVM_DEBUG(dbgs() << " Not stripmining loop which contains " + << "non-duplicatable instructions.\n"); + ORE.emit(createMissedAnalysis("NotDuplicatable", L) + << "Cannot stripmine loop with non-duplicatable instructions."); + return false; + } + + // If the loop contains a convergent operation, then the control flow + // introduced between the stripmined loop and epilog is unsafe -- it adds a + // control-flow dependency to the convergent operation. + if (Convergent) { + LLVM_DEBUG(dbgs() << " Skipping loop with convergent operations.\n"); + ORE.emit(createMissedAnalysis("Convergent", L) + << "Cannot stripmine loop with convergent instructions."); + return false; + } + + // If the loop contains potentially expensive function calls, then we don't + // want to stripmine it. + if (NumCalls > 0 && !explicitCount && !StripMiningRequested) { + LLVM_DEBUG(dbgs() << " Skipping loop with expensive function calls.\n"); + ORE.emit(createMissedAnalysis("ExpensiveCalls", L) + << "Not stripmining loop with potentially expensive calls."); + return false; + } + + // Make sure the count is a power of 2. + if (!isPowerOf2_32(SMP.Count)) + SMP.Count = NextPowerOf2(SMP.Count); + if (SMP.Count < 2) { + if (Hints.getGrainsize() == 1) + return false; + ORE.emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "LargeLoop", + L->getStartLoc(), L->getHeader()) + << "using grainsize 1 for large loop"; + }); + Hints.setAlreadyStripMined(); + return true; + } + + // Find a constant trip count if available + unsigned ConstTripCount = getConstTripCount(L, SE); + + // Stripmining factor (Count) must be less or equal to TripCount. + if (ConstTripCount && SMP.Count >= ConstTripCount) { + ORE.emit(createMissedAnalysis("FullStripMine", L) + << "Stripmining count larger than loop trip count."); + ORE.emit(DiagnosticInfoOptimizationFailure( + DEBUG_TYPE, "UnprofitableParallelLoop", + L->getStartLoc(), L->getHeader()) + << "Parallel loop does not appear profitable to parallelize."); + return false; + } + + // When is it worthwhile to allow the epilog to run in parallel with the + // stripmined loop? We expect the epilog to perform G/2 iterations on + // average, where G is the selected grainsize. Our goal is to ensure that + // these G/2 iterations offset the cost of an additional detach. + // Mathematically, this means + // + // (G/2) * S + d <= (1 + \eps) * G/2 * S , + // + // where S is the work of one loop iteration, d is the cost of a detach, and + // \eps is a sufficiently small constant, e.g., 1/C for a coarsening factor C. + // We assume that the choice of G is chosen such that G * \eps <= 1, which is + // true for the automatic computation of G aimed at ensuring the stripmined + // loop performs at most a (1 + \eps) factor more work than its serial + // projection. Solving the above equation thus shows that the epilog should + // be allowed to run in parallel when S >= 2 * d. We check for this case and + // encode the result in ParallelEpilog. + Instruction *DetachI = L->getHeader()->getTerminator(); + bool ParallelEpilog = + RequireParallelEpilog || + (AllowParallelEpilog && + ((SMP.Count < SMP.DefaultCoarseningFactor) || + (2 * TTI.getInstructionCost(DetachI, + TargetTransformInfo::TCK_SizeAndLatency)) <= + LoopCost)); + + // Some parallel runtimes, such as Cilk, require nested parallel tasks to be + // synchronized. + bool NeedNestedSync = IncludeNestedSync; + if (!NeedNestedSync && TLI) + NeedNestedSync = (TLI->getTapirTarget() == TapirTargetID::Cilk || + TLI->getTapirTarget() == TapirTargetID::OpenCilk); + + // Save loop properties before it is transformed. + MDNode *OrigLoopID = L->getLoopID(); + + // Stripmine the loop + Loop *RemainderLoop = nullptr; + Loop *NewLoop = StripMineLoop(L, SMP.Count, SMP.AllowExpensiveTripCount, + SMP.UnrollRemainder, LI, &SE, &DT, TTI, &AC, TI, + &ORE, PreserveLCSSA, ParallelEpilog, + NeedNestedSync, &RemainderLoop); + if (!NewLoop) + return false; + + // Copy metadata to remainder loop + if (RemainderLoop && OrigLoopID) { + // Optional RemainderLoopID = makeFollowupLoopID( + // OrigLoopID, {}, "tapir.loop"); + MDNode *NewRemainderLoopID = + CopyNonTapirLoopMetadata(RemainderLoop->getLoopID(), OrigLoopID); + RemainderLoop->setLoopID(NewRemainderLoopID); + } + + // Mark the new loop as stripmined. + TapirLoopHints NewHints(NewLoop); + NewHints.setAlreadyStripMined(); + + return true; +} + +namespace { + +class LoopStripMine : public LoopPass { +public: + static char ID; // Pass ID, replacement for typeid + + std::optional ProvidedCount; + + LoopStripMine(std::optional Count = std::nullopt) + : LoopPass(ID), ProvidedCount(Count) { + initializeLoopStripMinePass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + + Function &F = *L->getHeader()->getParent(); + + auto &TLI = getAnalysis().getTLI(F); + auto &DT = getAnalysis().getDomTree(); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + TaskInfo *TI = &getAnalysis().getTaskInfo(); + ScalarEvolution &SE = getAnalysis().getSE(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + auto &AC = getAnalysis().getAssumptionCache(F); + // For the old PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(&F); + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + return tryToStripMineLoop(L, DT, LI, SE, TTI, AC, TI, ORE, &TLI, + PreserveLCSSA, ProvidedCount); + } + + /// This transformation requires natural loop information & requires that + /// loop preheaders be inserted into the CFG... + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + getLoopAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char LoopStripMine::ID = 0; + +INITIALIZE_PASS_BEGIN(LoopStripMine, "loop-stripmine", "Stripmine Tapir loops", + false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(LoopStripMine, "loop-stripmine", "Stripmine Tapir loops", + false, false) + +Pass *llvm::createLoopStripMinePass(int Count) { + // TODO: It would make more sense for this function to take the optionals + // directly, but that's dangerous since it would silently break out of tree + // callers. + return new LoopStripMine(Count == -1 ? std::nullopt + : std::optional(Count)); +} + +PreservedAnalyses LoopStripMinePass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TLI = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); + auto &TI = AM.getResult(F); + auto &ORE = AM.getResult(F); + + LoopAnalysisManager *LAM = nullptr; + if (auto *LAMProxy = AM.getCachedResult(F)) + LAM = &LAMProxy->getManager(); + + // const ModuleAnalysisManager &MAM = + // AM.getResult(F).getManager(); + // ProfileSummaryInfo *PSI = + // MAM.getCachedResult(*F.getParent()); + + bool Changed = false; + + // The stripminer requires loops to be in simplified form, and also needs + // LCSSA. Since simplification may add new inner loops, it has to run before + // the legality and profitability checks. This means running the loop + // stripminer will simplify all loops, regardless of whether anything end up + // being stripmined. + for (auto &L : LI) { + Changed |= simplifyLoop(L, &DT, &LI, &SE, &AC, nullptr, + /* PreserveLCSSA */ false); + Changed |= formLCSSARecursively(*L, DT, &LI, &SE); + } + + SmallPriorityWorklist Worklist; + appendLoopsToWorklist(LI, Worklist); + + while (!Worklist.empty()) { + // Because the LoopInfo stores the loops in RPO, we walk the worklist from + // back to front so that we work forward across the CFG, which for + // stripmining is only needed to get optimization remarks emitted in a + // forward order. + Loop &L = *Worklist.pop_back_val(); +#ifndef NDEBUG + Loop *ParentL = L.getParentLoop(); +#endif + + // // Check if the profile summary indicates that the profiled application + // // has a huge working set size, in which case we disable peeling to avoid + // // bloating it further. + // if (PSI && PSI->hasHugeWorkingSetSize()) + // AllowPeeling = false; + std::string LoopName = std::string(L.getName()); + bool LoopChanged = + tryToStripMineLoop(&L, DT, &LI, SE, TTI, AC, &TI, ORE, &TLI, + /*PreserveLCSSA*/ true, /*Count*/ std::nullopt); + Changed |= LoopChanged; + + // The parent must not be damaged by stripmining! +#ifndef NDEBUG + if (LoopChanged && ParentL) + ParentL->verifyLoop(); +#endif + + // Clear any cached analysis results for L if we removed it completely. + if (LAM && LoopChanged) + LAM->clear(L, LoopName); + } + + if (!Changed) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp new file mode 100644 index 00000000000000..319083f81e1ed9 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -0,0 +1,1275 @@ +//===- LoweringUtils.cpp - Utility functions for lowering Tapir -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements several utility functions for lowering Tapir. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Timer.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/Tapir/CilkABI.h" +#include "llvm/Transforms/Tapir/LambdaABI.h" +#include "llvm/Transforms/Tapir/OMPTaskABI.h" +#include "llvm/Transforms/Tapir/OpenCilkABI.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Tapir/QthreadsABI.h" +#include "llvm/Transforms/Tapir/SerialABI.h" +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "tapirlowering" + +static const char TimerGroupName[] = DEBUG_TYPE; +static const char TimerGroupDescription[] = "Tapir lowering"; + +TapirTarget *llvm::getTapirTargetFromID(Module &M, TapirTargetID ID) { + switch (ID) { + case TapirTargetID::None: + return nullptr; + case TapirTargetID::Serial: + return new SerialABI(M); + case TapirTargetID::Cilk: + return new CilkABI(M); + case TapirTargetID::Cheetah: + case TapirTargetID::OpenCilk: + return new OpenCilkABI(M); + case TapirTargetID::Lambda: + return new LambdaABI(M); + case TapirTargetID::OMPTask: + return new OMPTaskABI(M); + case TapirTargetID::Qthreads: + return new QthreadsABI(M); + default: + llvm_unreachable("Invalid TapirTargetID"); + } +} + +//----------------------------------------------------------------------------// +// Lowering utilities for Tapir tasks. + +/// Helper function to find the inputs and outputs to task T, based only the +/// blocks in T and no subtask of T. +static void +findTaskInputsOutputs(const Task *T, ValueSet &Inputs, ValueSet &Outputs, + const DominatorTree &DT) { + NamedRegionTimer NRT("findTaskInputsOutputs", "Find task inputs and outputs", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Get the sync region for this task's detach, so we can filter it out of this + // task's inputs. + const Value *SyncRegion = nullptr; + SmallPtrSet UnwindPHIs; + if (DetachInst *DI = T->getDetach()) { + SyncRegion = DI->getSyncRegion(); + // Ignore PHIs in the unwind destination of the detach. + if (DI->hasUnwindDest()) + UnwindPHIs.insert(DI->getUnwindDest()); + // Get the PHI nodes that directly or indirectly use the landing pad of the + // unwind destination of this task's detach. + getDetachUnwindPHIUses(DI, UnwindPHIs); + } + + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + LLVM_DEBUG(dbgs() << "Examining spindle for inputs/outputs: " << *S + << "\n"); + for (BasicBlock *BB : S->blocks()) { + // Skip basic blocks that are successors of detached rethrows. They're + // dead anyway. + if (isSuccessorOfDetachedRethrow(BB) || isPlaceholderSuccessor(BB)) + continue; + + // If a used value is defined outside the region, it's an input. If an + // instruction is used outside the region, it's an output. + for (Instruction &II : *BB) { + // Examine all operands of this instruction. + for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; + ++OI) { + + // If the operand of I is defined in the same basic block as I, then + // it's not an input. + if (Instruction *OP = dyn_cast(*OI)) + if (OP->getParent() == BB) + continue; + + // PHI nodes in the entry block of a shared-EH exit will be + // rewritten in any cloned helper, so we skip operands of these PHI + // nodes for blocks not in this task. + if (S->isSharedEH() && S->isEntry(BB)) + if (PHINode *PN = dyn_cast(&II)) { + LLVM_DEBUG(dbgs() + << "\tPHI node in shared-EH spindle: " << *PN << "\n"); + if (!T->simplyEncloses(PN->getIncomingBlock(*OI))) { + LLVM_DEBUG(dbgs() << "skipping\n"); + continue; + } + } + // If the operand is the sync region of this task's detach, skip it. + if (SyncRegion == *OI) + continue; + // If this operand is defined in the parent, it's an input. + if (T->definedInParent(*OI)) + Inputs.insert(*OI); + } + // Examine all uses of this instruction + for (User *U : II.users()) { + // If we find a live use outside of the task, it's an output. + if (Instruction *I = dyn_cast(U)) { + // Skip uses in PHI nodes that depend on the unwind landing pad of + // the detach. + if (UnwindPHIs.count(I->getParent())) + continue; + if (!T->encloses(I->getParent()) && + DT.isReachableFromEntry(I->getParent())) + Outputs.insert(&II); + } + } + } + } + } +} + +/// Determine the inputs for all tasks in this function. Returns a map from +/// tasks to their inputs. +/// +/// Aggregating all of this work into a single routine allows us to avoid +/// redundant traversals of basic blocks in nested tasks. +TaskValueSetMap llvm::findAllTaskInputs(Function &F, const DominatorTree &DT, + const TaskInfo &TI) { + TaskValueSetMap TaskInputs; + for (Task *T : post_order(TI.getRootTask())) { + // Skip the root task + if (T->isRootTask()) break; + + LLVM_DEBUG(dbgs() << "Finding inputs/outputs for task@" + << T->getEntry()->getName() << "\n"); + ValueSet Inputs, Outputs; + // Check all inputs of subtasks to determine if they're inputs to this task. + for (Task *SubT : T->subtasks()) { + LLVM_DEBUG(dbgs() << "\tsubtask @ " << SubT->getEntry()->getName() + << "\n"); + + if (TaskInputs.count(SubT)) + for (Value *V : TaskInputs[SubT]) + if (T->definedInParent(V)) + Inputs.insert(V); + } + + LLVM_DEBUG({ + dbgs() << "Subtask Inputs:\n"; + for (Value *V : Inputs) + dbgs() << "\t" << *V << "\n"; + dbgs() << "Subtask Outputs:\n"; + for (Value *V : Outputs) + dbgs() << "\t" << *V << "\n"; + }); + assert(Outputs.empty() && "Task should have no outputs."); + + // Find additional inputs and outputs of task T by examining blocks in T and + // not in any subtask of T. + findTaskInputsOutputs(T, Inputs, Outputs, DT); + + LLVM_DEBUG({ + dbgs() << "Inputs:\n"; + for (Value *V : Inputs) + dbgs() << "\t" << *V << "\n"; + dbgs() << "Outputs:\n"; + for (Value *V : Outputs) + dbgs() << "\t" << *V << "\n"; + }); + assert(Outputs.empty() && "Task should have no outputs."); + + // Map the computed inputs to this task. + TaskInputs[T] = Inputs; + } + return TaskInputs; +} + +// Helper function to check if a value is defined outside of a given spindle. +static bool definedOutsideTaskFrame(const Value *V, const Spindle *TF, + const TaskInfo &TI) { + // Arguments are always defined outside of spindles. + if (isa(V)) + return true; + + // If V is an instruction, check if TFSpindles contains it. + if (const Instruction *I = dyn_cast(V)) + return !taskFrameContains(TF, I->getParent(), TI); + + return false; +} + +/// Get the set of inputs for the given task T, accounting for the taskframe of +/// T, if it exists. +void llvm::getTaskFrameInputsOutputs(TFValueSetMap &TFInputs, + TFValueSetMap &TFOutputs, + const Spindle &TF, + const ValueSet *TaskInputs, + const TaskInfo &TI, + const DominatorTree &DT) { + NamedRegionTimer NRT("getTaskFrameInputsOutputs", + "Find taskframe inputs and outputs", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + + const Task *T = TF.getTaskFromTaskFrame(); + if (T) + LLVM_DEBUG(dbgs() << "getTaskFrameInputsOutputs: task@" + << T->getEntry()->getName() << "\n"); + else + LLVM_DEBUG(dbgs() << "getTaskFrameInputsOutputs: taskframe spindle@" + << TF.getEntry()->getName() << "\n"); + + // Check the taskframe spindles for definitions of inputs to T. + if (TaskInputs) + for (Value *V : *TaskInputs) + if (definedOutsideTaskFrame(V, &TF, TI)) + TFInputs[&TF].insert(V); + + // Get inputs from child taskframes. + for (Spindle *SubTF : TF.subtaskframes()) + for (Value *V : TFInputs[SubTF]) + if (definedOutsideTaskFrame(V, &TF, TI)) + TFInputs[&TF].insert(V); + + Value *TFCreate = T ? T->getTaskFrameUsed() : TF.getTaskFrameCreate(); + // Get inputs and outputs of the taskframe. + for (Spindle *S : TF.taskframe_spindles()) { + // Skip taskframe spindles within the task itself. + if (T && T->contains(S)) + continue; + + // Skip spindles that are placeholders. + if (isPlaceholderSuccessor(S->getEntry())) + continue; + + for (BasicBlock *BB : S->blocks()) { + for (Instruction &I : *BB) { + // Ignore certain instructions from consideration: the taskframe.create + // intrinsic for this taskframe, the detach instruction that spawns T, + // and the landingpad value in T's EH continuation. + if ((TFCreate == &I) || isa(&I) || + (T && T->getLPadValueInEHContinuationSpindle() == &I)) + continue; + + // Examine all operands of this instruction + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; + ++OI) { + + // If the operand of I is defined in the same basic block as I, then + // it's not an input. + if (Instruction *OP = dyn_cast(*OI)) + if (OP->getParent() == BB) + continue; + + // Some canonicalization methods, e.g., loop canonicalization, will + // introduce a basic block after a detached-rethrow that branches to + // the successor of the EHContinuation entry. As a result, we can get + // PHI nodes that use the landingpad of a detached-rethrow. These + // PHI-node inputs will be rewritten anyway, so skip them. + if (isa(I)) + if (Instruction *OP = dyn_cast(*OI)) { + if (isa(*OP) && T && T->encloses(OP->getParent())) + if (isSuccessorOfDetachedRethrow(OP->getParent())) + continue; + // Also ignore PHI nodes in shared-eh spindles. + if (T && S->isSharedEH()) + continue; + } + + // Skip detached-rethrow calls in shared-eh spindles. + if (T && S->isSharedEH()) + if (isDetachedRethrow(&I)) + continue; + + // TODO: Add a test to exclude landingpads from detached-rethrows? + LLVM_DEBUG({ + if (Instruction *OP = dyn_cast(*OI)) { + assert(!(T && T->encloses(OP->getParent())) && + "TaskFrame uses value defined in task."); + } + }); + // If this operand is not defined outside of the taskframe, then it's + // an input. + if (definedOutsideTaskFrame(*OI, &TF, TI)) + TFInputs[&TF].insert(*OI); + } + // Examine all users of this instruction. + for (User *U : I.users()) { + // If we find a live use outside of the task, it's an output. + if (Instruction *UI = dyn_cast(U)) { + if (definedOutsideTaskFrame(UI, &TF, TI) && + DT.isReachableFromEntry(UI->getParent())) + TFOutputs[&TF].insert(&I); + } + } + } + } + } +} + +/// Determine the inputs for all taskframes in this function. Returns a map +/// from tasks to their inputs. +/// +/// Aggregating all of this work into a single routine allows us to avoid +/// redundant traversals of basic blocks in nested tasks. +void llvm::findAllTaskFrameInputs( + TFValueSetMap &TFInputs, TFValueSetMap &TFOutputs, + const SmallVectorImpl &AllTaskFrames, Function &F, + const DominatorTree &DT, TaskInfo &TI) { + // Determine the inputs for all tasks. + TaskValueSetMap TaskInputs = findAllTaskInputs(F, DT, TI); + + for (Spindle *TF : AllTaskFrames) { + Task *T = TF->getTaskFromTaskFrame(); + + // Update the inputs to account for the taskframe. + getTaskFrameInputsOutputs(TFInputs, TFOutputs, *TF, + T ? &TaskInputs[T] : nullptr, TI, DT); + + LLVM_DEBUG({ + dbgs() << "TFInputs:\n"; + for (Value *V : TFInputs[TF]) + dbgs() << "\t" << *V << "\n"; + dbgs() << "TFOutputs:\n"; + for (Value *V : TFOutputs[TF]) + dbgs() << "\t" << *V << "\n"; + }); + } +} + +/// Create a structure for storing all arguments to a task. +/// +/// NOTE: This function inserts the struct for task arguments in the same +/// location as the Reference compiler and other compilers that lower parallel +/// constructs in the front end. This location is NOT the correct place, +/// however, for handling tasks that are spawned inside of a serial loop. +std::pair +llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, + Instruction *StorePt, Instruction *LoadPt, + bool staticStruct, ValueToValueMapTy &InputsMap, + Loop *TapirL) { + assert(T && T->getParentTask() && "Expected spawned task."); + SmallPtrSet TaskFrameBlocks; + if (Spindle *TFCreateSpindle = T->getTaskFrameCreateSpindle()) { + // Collect taskframe blocks + for (Spindle *S : TFCreateSpindle->taskframe_spindles()) { + // Skip spindles contained in the task. + if (T->contains(S)) + continue; + // Skip placeholder spindles. + if (isPlaceholderSuccessor(S->getEntry())) + continue; + + for (BasicBlock *B : S->blocks()) + TaskFrameBlocks.insert(B); + } + } + assert((T->encloses(LoadPt->getParent()) || + TaskFrameBlocks.contains(LoadPt->getParent()) || + (TapirL && LoadPt->getParent() == TapirL->getHeader())) && + "Loads of struct arguments must be inside task."); + assert(!T->encloses(StorePt->getParent()) && + !TaskFrameBlocks.contains(StorePt->getParent()) && + "Store of struct arguments must be outside task."); + assert(T->getParentTask()->encloses(StorePt->getParent()) && + "Store of struct arguments expected to be in parent task."); + SmallVector InputsToSort; + { + for (Value *V : Inputs) + InputsToSort.push_back(V); + Function *F = T->getEntry()->getParent(); + const DataLayout &DL = F->getParent()->getDataLayout(); + std::sort(InputsToSort.begin(), InputsToSort.end(), + [&DL](const Value *A, const Value *B) { + return DL.getTypeSizeInBits(A->getType()) > + DL.getTypeSizeInBits(B->getType()); + }); + } + + // Get vector of struct inputs and their types. + SmallVector StructInputs; + SmallVector StructIT; + for (Value *V : InputsToSort) { + StructInputs.push_back(V); + StructIT.push_back(V->getType()); + } + + // Create an alloca for this struct in the parent task's entry block. + Instruction *ArgsStart = StorePt; + IRBuilder<> B(StorePt); + // TODO: Add lifetime intrinsics for this allocation. + AllocaInst *Closure; + StructType *ST = StructType::get(T->getEntry()->getContext(), StructIT); + LLVM_DEBUG(dbgs() << "Closure struct type " << *ST << "\n"); + if (staticStruct) { + Spindle *ParentTF = T->getEntrySpindle()->getTaskFrameParent(); + BasicBlock *AllocaInsertBlk = + ParentTF ? ParentTF->getEntry() : T->getParentTask()->getEntry(); + Value *TFCreate = ParentTF ? ParentTF->getTaskFrameCreate() : nullptr; + IRBuilder<> Builder(TFCreate + ? &*++cast(TFCreate)->getIterator() + : &*AllocaInsertBlk->getFirstInsertionPt()); + Closure = Builder.CreateAlloca(ST); + // Store arguments into the structure + if (!StructInputs.empty()) + ArgsStart = B.CreateStore(StructInputs[0], + B.CreateConstGEP2_32(ST, Closure, 0, 0)); + for (unsigned i = 1; i < StructInputs.size(); ++i) + B.CreateStore(StructInputs[i], B.CreateConstGEP2_32(ST, Closure, 0, i)); + } else { + // Add code to store values into struct immediately before detach. + Closure = B.CreateAlloca(ST); + ArgsStart = Closure; + // Store arguments into the structure + for (unsigned i = 0; i < StructInputs.size(); ++i) + B.CreateStore(StructInputs[i], B.CreateConstGEP2_32(ST, Closure, 0, i)); + } + + // Add code to load values from struct in task entry and use those loaded + // values. + IRBuilder<> B2(LoadPt); + for (unsigned i = 0; i < StructInputs.size(); ++i) { + auto STGEP = cast(B2.CreateConstGEP2_32(ST, Closure, 0, i)); + auto STLoad = B2.CreateLoad(StructIT[i], STGEP); + InputsMap[StructInputs[i]] = STLoad; + + // Update all uses of the struct inputs in the loop body. + auto UI = StructInputs[i]->use_begin(), E = StructInputs[i]->use_end(); + for (; UI != E;) { + Use &U = *UI; + ++UI; + auto *Usr = dyn_cast(U.getUser()); + if (!Usr) + continue; + if ((!T->encloses(Usr->getParent()) && + !TaskFrameBlocks.contains(Usr->getParent()) && + (!TapirL || (Usr->getParent() != TapirL->getHeader() && + Usr->getParent() != TapirL->getLoopLatch())))) + continue; + U.set(STLoad); + } + } + + return std::make_pair(Closure, ArgsStart); +} + +/// Organize the set \p Inputs of values in \p F into a set \p Fixed of values +/// that can be used as inputs to a helper function. +void llvm::fixupInputSet(Function &F, const ValueSet &Inputs, ValueSet &Fixed) { + // Scan for any sret parameters in TaskInputs and add them first. These + // parameters must appear first or second in the prototype of the Helper + // function. + Value *SRetInput = nullptr; + if (F.hasStructRetAttr()) { + Function::arg_iterator ArgIter = F.arg_begin(); + if (F.hasParamAttribute(0, Attribute::StructRet)) + if (Inputs.count(&*ArgIter)) + SRetInput = &*ArgIter; + if (F.hasParamAttribute(1, Attribute::StructRet)) { + ++ArgIter; + if (Inputs.count(&*ArgIter)) + SRetInput = &*ArgIter; + } + } + if (SRetInput) { + LLVM_DEBUG(dbgs() << "sret input " << *SRetInput << "\n"); + Fixed.insert(SRetInput); + } + + // Sort the inputs to the task with largest arguments first, in order to + // improve packing or arguments in memory. + SmallVector InputsToSort; + for (Value *V : Inputs) + if (V != SRetInput) + InputsToSort.push_back(V); + LLVM_DEBUG({ + dbgs() << "After sorting:\n"; + for (Value *V : InputsToSort) + dbgs() << "\t" << *V << "\n"; + }); + const DataLayout &DL = F.getParent()->getDataLayout(); + std::sort(InputsToSort.begin(), InputsToSort.end(), + [&DL](const Value *A, const Value *B) { + return DL.getTypeSizeInBits(A->getType()) > + DL.getTypeSizeInBits(B->getType()); + }); + + // Add the remaining inputs. + for (Value *V : InputsToSort) + if (!Fixed.count(V)) + Fixed.insert(V); +} + +/// Organize the inputs to task \p T, given in \p TaskInputs, to create an +/// appropriate set of inputs, \p HelperInputs, to pass to the outlined +/// function for \p T. +Instruction *llvm::fixupHelperInputs( + Function &F, Task *T, ValueSet &TaskInputs, ValueSet &HelperArgs, + Instruction *StorePt, Instruction *LoadPt, + TapirTarget::ArgStructMode useArgStruct, + ValueToValueMapTy &InputsMap, Loop *TapirL) { + if (TapirTarget::ArgStructMode::None != useArgStruct) { + std::pair ArgsStructInfo = + createTaskArgsStruct(TaskInputs, T, StorePt, LoadPt, + TapirTarget::ArgStructMode::Static == useArgStruct, + InputsMap, TapirL); + HelperArgs.insert(ArgsStructInfo.first); + return ArgsStructInfo.second; + } + + fixupInputSet(F, TaskInputs, HelperArgs); + return StorePt; +} + +/// Returns true if BasicBlock \p B is the immediate successor of only +/// detached-rethrow instructions. +bool llvm::isSuccessorOfDetachedRethrow(const BasicBlock *B) { + for (const BasicBlock *Pred : predecessors(B)) + if (!isDetachedRethrow(Pred->getTerminator())) + return false; + return true; +} + +/// Collect the set of blocks in task \p T. All blocks enclosed by \p T will be +/// pushed onto \p TaskBlocks. The set of blocks terminated by reattaches from +/// \p T are added to \p ReattachBlocks. The set of blocks terminated by +/// detached-rethrow instructions are added to \p TaskResumeBlocks. The set of +/// entry points to exception-handling blocks shared by \p T and other tasks in +/// the same function are added to \p SharedEHEntries. +void llvm::getTaskBlocks(Task *T, std::vector &TaskBlocks, + SmallPtrSetImpl &ReattachBlocks, + SmallPtrSetImpl &TaskResumeBlocks, + SmallPtrSetImpl &SharedEHEntries, + const DominatorTree *DT) { + NamedRegionTimer NRT("getTaskBlocks", "Get task blocks", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + SmallPtrSet SpindlesToExclude; + for (Spindle *TFSpindle : T->taskframe_creates()) + for (Spindle *S : TFSpindle->taskframe_spindles()) + SpindlesToExclude.insert(S); + + // Add taskframe-spindle blocks. + if (Spindle *TFCreateSpindle = T->getTaskFrameCreateSpindle()) { + for (Spindle *S : TFCreateSpindle->taskframe_spindles()) { + if (T->contains(S)) + continue; + + // Skip spindles that are placeholders. + if (isPlaceholderSuccessor(S->getEntry())) + continue; + + LLVM_DEBUG(dbgs() << "Adding blocks in taskframe spindle " << *S << "\n"); + assert(!SpindlesToExclude.count(S) && + "Taskframe spindle marked for exclusion."); + + if (T->getEHContinuationSpindle() == S) + SharedEHEntries.insert(S->getEntry()); + else { + // Some canonicalization methods, e.g., loop canonicalization, will + // introduce a basic block after a detached-rethrow that branches to the + // successor of the EHContinuation entry. + for (BasicBlock *Pred : predecessors(S->getEntry())) + if (isSuccessorOfDetachedRethrow(Pred)) + SharedEHEntries.insert(S->getEntry()); + } + + for (BasicBlock *B : S->blocks()) { + LLVM_DEBUG(dbgs() << "Adding task block " << B->getName() << "\n"); + TaskBlocks.push_back(B); + + if (isTaskFrameResume(B->getTerminator())) + TaskResumeBlocks.insert(B); + } + } + } + + // Record the predecessor spindles of the EH continuation, if there is one. + Spindle *EHContinuation = T->getEHContinuationSpindle(); + SmallPtrSet EHContPred; + if (EHContinuation) + for (Spindle *Pred : predecessors(EHContinuation)) + EHContPred.insert(Pred); + + // Add the spindles in the task proper. + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + if (SpindlesToExclude.count(S)) + continue; + + LLVM_DEBUG(dbgs() << "Adding task blocks in spindle " << *S << "\n"); + + // Record the entry blocks of any shared-EH spindles. + if (S->isSharedEH()) + SharedEHEntries.insert(S->getEntry()); + + // At -O0, the always-inliner can create blocks in the predecessor spindles + // of the EH continuation that are not reachable from the entry. These + // blocks are not cloned, but we mark them as shared EH entries so that + // outlining can correct any PHI nodes in those blocks. + if (EHContPred.count(S)) + for (BasicBlock *B : S->blocks()) + for (BasicBlock *Pred : predecessors(B)) + if (!DT->isReachableFromEntry(Pred)) { + SharedEHEntries.insert(B); + break; + } + + for (BasicBlock *B : S->blocks()) { + // Skip basic blocks that are successors of detached rethrows. They're + // dead anyway. + if (isSuccessorOfDetachedRethrow(B) || isPlaceholderSuccessor(B)) + continue; + + LLVM_DEBUG(dbgs() << "Adding task block " << B->getName() << "\n"); + TaskBlocks.push_back(B); + + // Record the blocks terminated by reattaches and detached rethrows. + if (isa(B->getTerminator())) + ReattachBlocks.insert(B); + if (isDetachedRethrow(B->getTerminator())) + TaskResumeBlocks.insert(B); + } + } +} + +/// Outlines the content of task \p T in function \p F into a new helper +/// function. The parameter \p Inputs specified the inputs to the helper +/// function. The map \p VMap is updated with the mapping of instructions in +/// \p T to instructions in the new helper function. +Function *llvm::createHelperForTask( + Function &F, Task *T, ValueSet &Args, Module *DestM, + ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA) { + // Collect all basic blocks in this task. + std::vector TaskBlocks; + // Reattach instructions and detached rethrows in this task might need special + // handling. + SmallPtrSet ReattachBlocks; + SmallPtrSet TaskResumeBlocks; + // Entry blocks of shared-EH spindles may contain PHI nodes that need to be + // rewritten in the cloned helper. + SmallPtrSet SharedEHEntries; + getTaskBlocks(T, TaskBlocks, ReattachBlocks, TaskResumeBlocks, + SharedEHEntries, &OA.DT); + + SmallVector Returns; // Ignore returns cloned. + ValueSet Outputs; + DetachInst *DI = T->getDetach(); + + BasicBlock *Header = T->getEntry(); + BasicBlock *Entry = DI->getParent(); + if (Spindle *TaskFrameCreate = T->getTaskFrameCreateSpindle()) { + Header = TaskFrameCreate->getEntry(); + Entry = Header->getSinglePredecessor(); + } + + Twine NameSuffix = ".otd" + Twine(T->getTaskDepth()); + Function *Helper; + { + NamedRegionTimer NRT("CreateHelper", "Create helper function", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + std::unique_ptr Mat = + std::make_unique( + dyn_cast(DI->getSyncRegion())); + Helper = CreateHelper( + Args, Outputs, TaskBlocks, Header, Entry, DI->getContinue(), VMap, + DestM, F.getSubprogram() != nullptr, Returns, NameSuffix.str(), + &ReattachBlocks, &TaskResumeBlocks, &SharedEHEntries, nullptr, nullptr, + ReturnType, nullptr, nullptr, Mat.get()); + } + assert(Returns.empty() && "Returns cloned when cloning detached CFG."); + + // Add alignment assumptions to arguments of helper, based on alignment of + // values in old function. + AddAlignmentAssumptions(&F, Args, VMap, DI, &OA.AC, &OA.DT); + + // Move allocas in the newly cloned detached CFG to the entry block of the + // helper. + { + NamedRegionTimer NRT("MoveAllocas", "Move allocas in cloned helper", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Collect the end instructions of the task. + SmallVector TaskEnds; + for (BasicBlock *EndBlock : ReattachBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + for (BasicBlock *EndBlock : TaskResumeBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + + // Move allocas in cloned detached block to entry of helper function. + BasicBlock *ClonedDetachedBlock = cast(VMap[T->getEntry()]); + MoveStaticAllocasInBlock(&Helper->getEntryBlock(), ClonedDetachedBlock, + TaskEnds); + + // If this task uses a taskframe, move allocas in cloned taskframe entry to + // entry of helper function. + if (Spindle *TFCreate = T->getTaskFrameCreateSpindle()) { + BasicBlock *ClonedTFEntry = cast(VMap[TFCreate->getEntry()]); + MoveStaticAllocasInBlock(&Helper->getEntryBlock(), ClonedTFEntry, + TaskEnds); + } + + // We do not need to add new llvm.stacksave/llvm.stackrestore intrinsics, + // because calling and returning from the helper will automatically manage + // the stack appropriately. + } + + // Convert the cloned detach into an unconditional branch. We do this + // conversion here in part to prevent the cloned task from being reprocessed. + if (VMap[DI]) { + NamedRegionTimer NRT("serializeClone", "Serialize cloned Tapir task", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + DetachInst *ClonedDI = cast(VMap[DI]); + BasicBlock *ClonedDetacher = ClonedDI->getParent(); + BasicBlock *ClonedContinue = ClonedDI->getContinue(); + ClonedContinue->removePredecessor(ClonedDetacher); + BranchInst *DetachRepl = BranchInst::Create(ClonedDI->getDetached()); + ReplaceInstWithInst(ClonedDI, DetachRepl); + VMap[DI] = DetachRepl; + } + + Helper->setMemoryEffects(computeFunctionBodyMemoryAccess(*Helper, OA.AA)); + + return Helper; +} + +/// Helper function to unlink task T's exception-handling blocks from T's +/// parent. +static void unlinkTaskEHFromParent(Task *T) { + DetachInst *DI = T->getDetach(); + + SmallPtrSet UnwindPHIs; + if (DI->hasUnwindDest()) + // Get PHIs in the unwind destination of the detach. + UnwindPHIs.insert(DI->getUnwindDest()); + // Get the PHI's that use the landing pad of the detach's unwind. + getDetachUnwindPHIUses(DI, UnwindPHIs); + + SmallVector ToRemove; + // Look through PHI's that use the landing pad of the detach's unwind, and + // update those PHI's to not refer to task T. + for (BasicBlock *BB : UnwindPHIs) { + for (BasicBlock *Pred : predecessors(BB)) { + // Ignore the shared-EH spindles in T, because those might be used by + // other subtasks of T's parent. The shared-EH spindles tracked by T's + // parent will be handled once all subtasks of T's parent have been + // processed. + if (T->simplyEncloses(Pred) && !T->encloses(BB) && + T->getParentTask()->encloses(BB)) { + // Update the PHI nodes in BB. + BB->removePredecessor(Pred); + // Remove the edge from Pred to BB. + IRBuilder<> B(Pred->getTerminator()); + Instruction *Unreach = B.CreateUnreachable(); + Unreach->setDebugLoc(Pred->getTerminator()->getDebugLoc()); + ToRemove.push_back(Pred->getTerminator()); + } + } + } + + // Remove the terminators we no longer need. + for (Instruction *I : ToRemove) + I->eraseFromParent(); +} + +static BasicBlock *getTaskFrameContinue(Spindle *TF) { + Value *TFCreate = TF->getTaskFrameCreate(); + for (User *U : TFCreate->users()) { + if (IntrinsicInst *UI = dyn_cast(U)) { + if (Intrinsic::taskframe_end == UI->getIntrinsicID()) + return UI->getParent()->getSingleSuccessor(); + } + } + return nullptr; +} + +/// Outlines the content of taskframe \p TF in function \p F into a new helper +/// function. The parameter \p Inputs specified the inputs to the helper +/// function. The map \p VMap is updated with the mapping of instructions in \p +/// TF to instructions in the new helper function. +Function *llvm::createHelperForTaskFrame( + Function &F, Spindle *TF, ValueSet &Args, Module *DestM, + ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA) { + // Collect all basic blocks in this task. + std::vector TaskBlocks; + // Reattach instructions and detached rethrows in this task might need special + // handling. + SmallPtrSet TFEndBlocks; + SmallPtrSet TFResumeBlocks; + // Entry blocks of shared-EH spindles may contain PHI nodes that need to be + // rewritten in the cloned helper. + SmallPtrSet SharedEHEntries; + { + NamedRegionTimer NRT("getTaskFrameBlocks", "Get taskframe blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Get taskframe blocks + for (Spindle *S : TF->taskframe_spindles()) { + // Skip spindles that are placeholders. + if (isPlaceholderSuccessor(S->getEntry())) + continue; + + LLVM_DEBUG(dbgs() << "Adding blocks in taskframe spindle " << *S << "\n"); + + // Some canonicalization methods, e.g., loop canonicalization, will + // introduce a basic block after a detached-rethrow that branches to the + // successor of the EHContinuation entry. + for (BasicBlock *Pred : predecessors(S->getEntry())) { + assert(!endsTaskFrame(Pred, TF->getTaskFrameCreate()) && + "Taskframe spindle after taskframe.end"); + if (isDetachedRethrow(Pred->getTerminator())) + SharedEHEntries.insert(S->getEntry()); + if (isSuccessorOfDetachedRethrow(Pred)) + SharedEHEntries.insert(S->getEntry()); + } + + // Terminate landingpads might be shared between a taskframe and its parent. + // It's safe to clone these blocks, but we need to be careful about PHI + // nodes. + if (S != TF) { + for (Spindle *PredS : predecessors(S)) { + if (!TF->taskFrameContains(PredS)) { + LLVM_DEBUG( + dbgs() + << "Taskframe spindle has predecessor outside of taskframe: " + << *S << "\n"); + SharedEHEntries.insert(S->getEntry()); + break; + } + } + } + + for (BasicBlock *B : S->blocks()) { + LLVM_DEBUG(dbgs() << "Adding taskframe block " << B->getName() << "\n"); + TaskBlocks.push_back(B); + + // Record any blocks that end the taskframe. + if (endsTaskFrame(B)) { + LLVM_DEBUG(dbgs() << "Recording taskframe.end block " << B->getName() + << "\n"); + TFEndBlocks.insert(B); + } + if (isTaskFrameResume(B->getTerminator())) { + LLVM_DEBUG(dbgs() << "Recording taskframe.resume block " << B->getName() + << "\n"); + TFResumeBlocks.insert(B); + } + + // Terminate landingpads might be shared between a taskframe and its + // parent. It's safe to clone these blocks, but we need to be careful + // about PHI nodes. + if ((B != S->getEntry()) && B->isLandingPad()) { + for (BasicBlock *Pred : predecessors(B)) { + if (!S->contains(Pred)) { + LLVM_DEBUG(dbgs() << "Block within taskframe spindle has " + "predecessor outside of spindle.\n"); + SharedEHEntries.insert(B); + } + } + } + } + } + } // end timed region + + SmallVector Returns; // Ignore returns cloned. + ValueSet Outputs; + Value *TFCreate = TF->getTaskFrameCreate(); + + BasicBlock *Header = TF->getEntry(); + BasicBlock *Entry = Header->getSinglePredecessor(); + BasicBlock *Continue = getTaskFrameContinue(TF); + assert(Continue && "Task frame lacks a continuation for outlining."); + + Twine NameSuffix = ".otf" + Twine(TF->getTaskFrameDepth()); + Function *Helper; + { + NamedRegionTimer NRT("CreateHelper", "Create helper function", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + std::unique_ptr Mat = + std::make_unique(); + Helper = CreateHelper(Args, Outputs, TaskBlocks, Header, Entry, Continue, + VMap, DestM, F.getSubprogram() != nullptr, Returns, + NameSuffix.str(), &TFEndBlocks, &TFResumeBlocks, + &SharedEHEntries, nullptr, nullptr, ReturnType, + nullptr, nullptr, Mat.get()); + } // end timed region + assert(Returns.empty() && "Returns cloned when cloning detached CFG."); + + // Add alignment assumptions to arguments of helper, based on alignment of + // values in old function. + AddAlignmentAssumptions(&F, Args, VMap, &Header->front(), &OA.AC, &OA.DT); + + // Move allocas in the newly cloned detached CFG to the entry block of the + // helper. + { + NamedRegionTimer NRT("MoveAllocas", "Move allocas in cloned helper", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Collect the end instructions of the task. + SmallVector TaskEnds; + for (BasicBlock *EndBlock : TFEndBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + for (BasicBlock *EndBlock : TFResumeBlocks) + TaskEnds.push_back(cast(VMap[EndBlock])->getTerminator()); + + // Move allocas in cloned taskframe entry block to entry of helper function. + BasicBlock *ClonedTFEntry = cast(VMap[Header]); + MoveStaticAllocasInBlock(&Helper->getEntryBlock(), ClonedTFEntry, + TaskEnds); + + // We do not need to add new llvm.stacksave/llvm.stackrestore intrinsics, + // because calling and returning from the helper will automatically manage + // the stack appropriately. + } // end timed region + + // Remove the cloned taskframe.end intrinsics, to prevent the cloned taskframe + // from being reprocessed. + if (VMap[TFCreate]) { + NamedRegionTimer NRT("serializeClone", "Serialize cloned Tapir task", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + SmallVector TFEndsToRemove; + for (BasicBlock *EndBlock : TFEndBlocks) { + BasicBlock *ClonedEndBlock = cast(VMap[EndBlock]); + if (Instruction *Prev = ClonedEndBlock->getTerminator()->getPrevNode()) + if (isTapirIntrinsic(Intrinsic::taskframe_end, Prev)) + TFEndsToRemove.push_back(Prev); + } + for (Instruction *ClonedTFEnd : TFEndsToRemove) + ClonedTFEnd->eraseFromParent(); + } + + Helper->setMemoryEffects(computeFunctionBodyMemoryAccess(*Helper, OA.AA)); + + return Helper; +} + +/// Outlines a taskframe \p TF into a helper function that accepts the inputs \p +/// Inputs. The map \p VMap is updated with the mapping of instructions in \p +/// TF to instructions in the new helper function. Information about the helper +/// function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo llvm::outlineTaskFrame( + Spindle *TF, ValueSet &Inputs, SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { + if (Task *T = TF->getTaskFromTaskFrame()) + return outlineTask(T, Inputs, HelperInputs, DestM, VMap, useArgStruct, + ReturnType, InputMap, OA); + + Function &F = *TF->getEntry()->getParent(); + BasicBlock *Entry = TF->getEntry(); + + Instruction *StorePt = Entry->getSinglePredecessor()->getTerminator(); + // Find the corresponding taskframe.resume, if one exists. + BasicBlock *Unwind = getTaskFrameResumeDest(TF->getTaskFrameCreate()); + BasicBlock *Continue = getTaskFrameContinue(TF); + + // Convert the inputs of the task to inputs to the helper. + ValueSet HelperArgs; + // TODO: Consider supporting arg structs for arbitrary outlined taskframes. + fixupInputSet(F, Inputs, HelperArgs); + Instruction *ArgsStart = StorePt; + + for (Value *V : HelperArgs) + HelperInputs.push_back(V); + + // Clone the blocks into a helper function. + Function *Helper = createHelperForTaskFrame(F, TF, HelperArgs, DestM, VMap, + ReturnType, OA); + Instruction *ClonedTF = cast(VMap[TF->getTaskFrameCreate()]); + return TaskOutlineInfo(Helper, Entry, nullptr, ClonedTF, Inputs, + ArgsStart, StorePt, Continue, Unwind); +} + +/// Replaces the spawned task \p T, with associated TaskOutlineInfo \p Out, with +/// a call or invoke to the outlined helper function created for \p T. +Instruction *llvm::replaceTaskFrameWithCallToOutline( + Spindle *TF, TaskOutlineInfo &Out, SmallVectorImpl &OutlineInputs) { + if (Task *T = TF->getTaskFromTaskFrame()) + // Remove any dependencies from T's exception-handling code to T's parent. + unlinkTaskEHFromParent(T); + + Instruction *ToReplace = Out.ReplCall; + BasicBlock *TFResumeBB = nullptr; + if (Value *TFCreate = TF->getTaskFrameCreate()) + if (Instruction *TFResume = getTaskFrameResume(TFCreate)) + TFResumeBB = TFResume->getParent(); + + // Update PHI nodes in entry of taskframe. + TF->getEntry()->removePredecessor(ToReplace->getParent()); + + // Add call to new helper function in original function. + if (!Out.ReplUnwind) { + // Common case. Insert a call to the outline immediately before the detach. + CallInst *TopCall; + // Create call instruction. + IRBuilder<> Builder(Out.ReplCall); + TopCall = Builder.CreateCall(Out.Outline, OutlineInputs); + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(ToReplace->getDebugLoc()); + if (Out.Outline->doesNotThrow()) + TopCall->setDoesNotThrow(); + // Replace the detach with an unconditional branch to its continuation. + ReplaceInstWithInst(ToReplace, BranchInst::Create(Out.ReplRet)); + return TopCall; + } else { + // The detach might catch an exception from the task. Replace the detach + // with an invoke of the outline. + InvokeInst *TopCall; + // Create invoke instruction. The ordinary return of the invoke is the + // detach's continuation, and the unwind return is the detach's unwind. + TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, + OutlineInputs, "", ToReplace->getParent()); + if (TFResumeBB) { + // Update PHI nodes in the unwind destination of TFResumeBB. + for (PHINode &PN : Out.ReplUnwind->phis()) + PN.replaceIncomingBlockWith(TFResumeBB, ToReplace->getParent()); + // Replace the terminator of TFResumeBB with an unreachable. + IRBuilder<> B(TFResumeBB->getTerminator()); + B.CreateUnreachable()->setDebugLoc( + TFResumeBB->getTerminator()->getDebugLoc()); + TFResumeBB->getTerminator()->eraseFromParent(); + } + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(ToReplace->getDebugLoc()); + // Remove the detach. The invoke serves as a replacement terminator. + ToReplace->eraseFromParent(); + return TopCall; + } +} + +/// Outlines a task \p T into a helper function that accepts the inputs \p +/// Inputs. The map \p VMap is updated with the mapping of instructions in \p T +/// to instructions in the new helper function. Information about the helper +/// function is returned as a TaskOutlineInfo structure. +TaskOutlineInfo llvm::outlineTask( + Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { + assert(!T->isRootTask() && "Cannot outline the root task."); + Function &F = *T->getEntry()->getParent(); + DetachInst *DI = T->getDetach(); + Value *TFCreate = T->getTaskFrameUsed(); + + Instruction *LoadPt = T->getEntry()->getFirstNonPHIOrDbgOrLifetime(); + Instruction *StorePt = DI; + BasicBlock *Unwind = DI->getUnwindDest(); + if (Spindle *TaskFrameCreate = T->getTaskFrameCreateSpindle()) { + // If this task uses a taskframe, determine load and store points based on + // taskframe intrinsics. + LoadPt = &*++TaskFrameCreate->getEntry()->begin(); + StorePt = + TaskFrameCreate->getEntry()->getSinglePredecessor()->getTerminator(); + // Ensure debug information on StorePt + if (!StorePt->getDebugLoc()) + StorePt->setDebugLoc(DI->getDebugLoc()); + if (Unwind) + // Find the corresponding taskframe.resume. + Unwind = getTaskFrameResumeDest(T->getTaskFrameUsed()); + } + + // Convert the inputs of the task to inputs to the helper. + ValueSet HelperArgs; + Instruction *ArgsStart = fixupHelperInputs(F, T, Inputs, HelperArgs, StorePt, + LoadPt, useArgStruct, InputMap); + for (Value *V : HelperArgs) + HelperInputs.push_back(V); + + // Clone the blocks into a helper function. + Function *Helper = createHelperForTask(F, T, HelperArgs, DestM, VMap, + ReturnType, OA); + Value *ClonedTFCreate = TFCreate ? VMap[TFCreate] : nullptr; + return TaskOutlineInfo(Helper, T->getEntry(), + dyn_cast_or_null(VMap[DI]), + dyn_cast_or_null(ClonedTFCreate), Inputs, + ArgsStart, StorePt, DI->getContinue(), Unwind); +} + +//----------------------------------------------------------------------------// +// Methods for lowering Tapir loops + +/// Returns true if the value \p V is defined outside the set \p Blocks of basic +/// blocks in a function. +static bool definedOutsideBlocks(const Value *V, + SmallPtrSetImpl &Blocks) { + if (isa(V)) return true; + if (const Instruction *I = dyn_cast(V)) + return !Blocks.count(I->getParent()); + return false; +} + +/// Returns true if the value V used inside the body of Tapir loop L is defined +/// outside of L. +static bool taskInputDefinedOutsideLoop(const Value *V, const Loop *L) { + if (isa(V)) + return true; + + const BasicBlock *Header = L->getHeader(); + const BasicBlock *Latch = L->getLoopLatch(); + if (const Instruction *I = dyn_cast(V)) + if ((Header != I->getParent()) && (Latch != I->getParent())) + return true; + return false; +} + +/// Given a Tapir loop \p TL and the set of inputs to the task inside that loop, +/// returns the set of inputs for the Tapir loop itself. +ValueSet llvm::getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs) { + Loop *L = TL->getLoop(); + Task *T = TL->getTask(); + ValueSet LoopInputs; + + for (Value *V : TaskInputs) + if (taskInputDefinedOutsideLoop(V, L)) + LoopInputs.insert(V); + + const Value *SyncRegion = T->getDetach()->getSyncRegion(); + + SmallPtrSet BlocksToCheck; + BlocksToCheck.insert(L->getHeader()); + BlocksToCheck.insert(L->getLoopLatch()); + for (BasicBlock *BB : BlocksToCheck) { + for (Instruction &II : *BB) { + // Skip the condition of this loop, since we will process that specially. + if (TL->getCondition() == &II) continue; + // Examine all operands of this instruction. + for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; + ++OI) { + // If the operand is the sync region of this task's detach, skip it. + if (SyncRegion == *OI) + continue; + LLVM_DEBUG({ + if (Instruction *OP = dyn_cast(*OI)) + assert(!T->encloses(OP->getParent()) && + "Loop control uses value defined in body task."); + }); + // If this operand is not defined in the header or latch, it's an input. + if (definedOutsideBlocks(*OI, BlocksToCheck)) + LoopInputs.insert(*OI); + } + } + } + + return LoopInputs; +} + +/// Replaces the Tapir loop \p TL, with associated TaskOutlineInfo \p Out, with +/// a call or invoke to the outlined helper function created for \p TL. +Instruction *llvm::replaceLoopWithCallToOutline( + TapirLoopInfo *TL, TaskOutlineInfo &Out, + SmallVectorImpl &OutlineInputs) { + // Remove any dependencies from the detach unwind of T code to T's parent. + unlinkTaskEHFromParent(TL->getTask()); + + LLVM_DEBUG({ + dbgs() << "Creating call with arguments:\n"; + for (Value *V : OutlineInputs) + dbgs() << "\t" << *V << "\n"; + }); + + Loop *L = TL->getLoop(); + // Add call to new helper function in original function. + if (!Out.ReplUnwind) { + // Common case. Insert a call to the outline immediately before the detach. + CallInst *TopCall; + // Create call instruction. + IRBuilder<> Builder(Out.ReplCall); + TopCall = Builder.CreateCall(Out.Outline, OutlineInputs); + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(TL->getDebugLoc()); + if (Out.Outline->doesNotThrow()) + TopCall->setDoesNotThrow(); + // Replace the loop with an unconditional branch to its exit. + L->getHeader()->removePredecessor(Out.ReplCall->getParent()); + ReplaceInstWithInst(Out.ReplCall, BranchInst::Create(Out.ReplRet)); + return TopCall; + } else { + // The detach might catch an exception from the task. Replace the detach + // with an invoke of the outline. + InvokeInst *TopCall; + + // Create invoke instruction. The ordinary return of the invoke is the + // detach's continuation, and the unwind return is the detach's unwind. + TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, + OutlineInputs); + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(TL->getDebugLoc()); + // Replace the loop with the invoke. + L->getHeader()->removePredecessor(Out.ReplCall->getParent()); + ReplaceInstWithInst(Out.ReplCall, TopCall); + // Add invoke parent as a predecessor for all Phi nodes in ReplUnwind. + for (PHINode &Phi : Out.ReplUnwind->phis()) + Phi.addIncoming(Phi.getIncomingValueForBlock(L->getHeader()), + TopCall->getParent()); + return TopCall; + } +} + +bool TapirTarget::shouldProcessFunction(const Function &F) const { + if (F.getName() == "main") + return true; + + if (canDetach(&F)) + return true; + + for (const Instruction &I : instructions(&F)) + if (const IntrinsicInst *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::hyper_lookup: + case Intrinsic::reducer_register: + case Intrinsic::reducer_unregister: + case Intrinsic::tapir_loop_grainsize: + case Intrinsic::task_frameaddress: + case Intrinsic::tapir_runtime_start: + case Intrinsic::tapir_runtime_end: + return true; + default: + break; + } + } + + return false; +} + +void TapirTarget::lowerTaskFrameAddrCall(CallInst *TaskFrameAddrCall) { + // By default, replace calls to task_frameaddress with ordinary calls to the + // frameaddress intrinsic. + TaskFrameAddrCall->setCalledFunction(Intrinsic::getDeclaration( + &M, Intrinsic::frameaddress, PointerType::getInt8PtrTy(M.getContext()))); +} + +void TapirTarget::lowerTapirRTCalls(SmallVectorImpl &TapirRTCalls, + Function &F, BasicBlock *TFEntry) { + // By default, do nothing with tapir_runtime_{start,end} calls. + return; +} + +/// Process the Tapir instructions in an ordinary (non-spawning and not spawned) +/// function \p F directly. +bool TapirTarget::processOrdinaryFunction(Function &F, BasicBlock *TFEntry) { + // By default, do no special processing for ordinary functions. Instead, the + // function will be processed using TapirToTargetImpl::processSimpleABI(). + return false; +} diff --git a/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp b/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp new file mode 100644 index 00000000000000..32c59ef532c544 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp @@ -0,0 +1,597 @@ +//===- OMPTaskABI.cpp - Generic interface to various runtime systems--------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OMP Task ABI to convert Tapir instructions to calls +// into kmpc task runtime calls. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/OMPTaskABI.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "omptaskabi" + +extern cl::opt DebugABICalls; + +static cl::opt ClRuntimeBCPath( + "omp-bc-path", cl::init(""), + cl::desc("Path to the bitcode file for the runtime ABI"), + cl::Hidden); + +static const StringRef StackFrameName = "__rts_sf"; + +namespace { + +// Custom DiagnosticInfo for linking the Lambda ABI bitcode file. +class OMPTaskABILinkDiagnosticInfo : public DiagnosticInfo { + const Module *SrcM; + const Twine &Msg; + +public: + OMPTaskABILinkDiagnosticInfo(DiagnosticSeverity Severity, const Module *SrcM, + const Twine &Msg) + : DiagnosticInfo(DK_Lowering, Severity), SrcM(SrcM), Msg(Msg) {} + void print(DiagnosticPrinter &DP) const override { + DP << "linking module '" << SrcM->getModuleIdentifier() << "': " << Msg; + } +}; + +// Custom DiagnosticHandler to handle diagnostics arising when linking the +// Lambda ABI bitcode file. +class OMPTaskABIDiagnosticHandler final : public DiagnosticHandler { + const Module *SrcM; + DiagnosticHandler *OrigHandler; + +public: + OMPTaskABIDiagnosticHandler(const Module *SrcM, + DiagnosticHandler *OrigHandler) + : SrcM(SrcM), OrigHandler(OrigHandler) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override { + if (DI.getKind() != DK_Linker) + return OrigHandler->handleDiagnostics(DI); + + std::string MsgStorage; + { + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + } + return OrigHandler->handleDiagnostics( + OMPTaskABILinkDiagnosticInfo(DI.getSeverity(), SrcM, MsgStorage)); + } +}; + +// Structure recording information about runtime ABI functions. +struct RTSFnDesc { + StringRef FnName; + FunctionType *FnType; + FunctionCallee &FnCallee; +}; +} // namespace + +// void OMPTaskABI::setOptions(const TapirTargetOptions &Options) { +// if (!isa(Options)) +// return; + +// const OMPTaskABIOptions &OptionsCast = cast(Options); + +// // Get the path to the runtime bitcode file. +// RuntimeBCPath = OptionsCast.getRuntimeBCPath(); +// } + +void OMPTaskABI::prepareModule() { + LLVMContext &C = M.getContext(); + const DataLayout &DL = DestM.getDataLayout(); + Type *Int8Ty = Type::getInt8Ty(C); + Type *Int16Ty = Type::getInt16Ty(C); + Type *Int32Ty = Type::getInt32Ty(C); + Type *Int64Ty = Type::getInt64Ty(C); + + // If a runtime bitcode path is given via the command line, use it. + if ("" != ClRuntimeBCPath) + RuntimeBCPath = ClRuntimeBCPath; + + if ("" == RuntimeBCPath) { + C.emitError("OMPTaskABI: No bitcode ABI file given."); + return; + } + + LLVM_DEBUG(dbgs() << "Using external bitcode file for Lambda ABI: " + << RuntimeBCPath << "\n"); + SMDiagnostic SMD; + + // Parse the bitcode file. This call imports structure definitions, but not + // function definitions. + if (std::unique_ptr ExternalModule = + parseIRFile(RuntimeBCPath, SMD, C)) { + // Get the original DiagnosticHandler for this context. + std::unique_ptr OrigDiagHandler = + C.getDiagnosticHandler(); + + // Setup an OMPTaskABIDiagnosticHandler for this context, to handle + // diagnostics that arise from linking ExternalModule. + C.setDiagnosticHandler(std::make_unique( + ExternalModule.get(), OrigDiagHandler.get())); + + // Link the external module into the current module, copying over global + // values. + // + // TODO: Consider restructuring the import process to use + // Linker::Flags::LinkOnlyNeeded to copy over only the necessary contents + // from the external module. + bool Fail = Linker::linkModules( + M, std::move(ExternalModule), Linker::Flags::None, + [](Module &M, const StringSet<> &GVS) { + for (StringRef GVName : GVS.keys()) { + LLVM_DEBUG(dbgs() << "Linking global value " << GVName << "\n"); + if (Function *Fn = M.getFunction(GVName)) { + if (!Fn->isDeclaration() && !Fn->hasComdat()) + // We set the function's linkage as available_externally, so + // that subsequent optimizations can remove these definitions + // from the module. We don't want this module redefining any of + // these symbols, even if they aren't inlined, because the + // Lambda runtime library will provide those definitions later. + Fn->setLinkage(Function::AvailableExternallyLinkage); + } else if (GlobalVariable *G = M.getGlobalVariable(GVName)) { + if (!G->isDeclaration() && !G->hasComdat()) + G->setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } + }); + if (Fail) + C.emitError("OMPTaskABI: Failed to link bitcode ABI file: " + + Twine(RuntimeBCPath)); + + // Restore the original DiagnosticHandler for this context. + C.setDiagnosticHandler(std::move(OrigDiagHandler)); + } else { + C.emitError("OMPTaskABI: Failed to parse bitcode ABI file: " + + Twine(RuntimeBCPath)); + } + + // Get or create local definitions of RTS structure types. + const char *StackFrameName = "struct.__rts_stack_frame"; + StackFrameTy = StructType::lookupOrCreate(C, StackFrameName); + + const char *TaskTyName = "struct.kmp_task"; + TaskTy = StructType::lookupOrCreate(C, TaskTyName); + + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Type *VoidTy = Type::getVoidTy(C); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + + // Define the types of the RTS functions. + FunctionType *RTSFnTy = FunctionType::get(VoidTy, {StackFramePtrTy}, false); + SpawnBodyFnArgTy = VoidPtrTy; + Type *IntPtrTy = DL.getIntPtrType(C); + SpawnBodyFnArgSizeTy = IntPtrTy; + SpawnBodyFnTy = FunctionType::get(Int32Ty, {Int32Ty, VoidPtrTy}, false); + FunctionType *ArgsFromTaskFnTy = + FunctionType::get(VoidPtrTy, {VoidPtrTy, IntPtrTy}, false); + FunctionType *SpawnFnTy = + FunctionType::get(VoidTy, + {StackFramePtrTy, PointerType::getUnqual(SpawnBodyFnTy), + SpawnBodyFnArgTy, SpawnBodyFnArgSizeTy, IntPtrTy}, + false); + FunctionType *Grainsize8FnTy = FunctionType::get(Int8Ty, {Int8Ty}, false); + FunctionType *Grainsize16FnTy = FunctionType::get(Int16Ty, {Int16Ty}, false); + FunctionType *Grainsize32FnTy = FunctionType::get(Int32Ty, {Int32Ty}, false); + FunctionType *Grainsize64FnTy = FunctionType::get(Int64Ty, {Int64Ty}, false); + FunctionType *WorkerInfoTy = FunctionType::get(Int32Ty, {}, false); + + // Create an array of RTS functions, with their associated types and + // FunctionCallee member variables in the OMPTaskABI class. + RTSFnDesc RTSFunctions[] = { + {"__rts_enter_frame", RTSFnTy, RTSEnterFrame}, + {"__rts_get_args_from_task", ArgsFromTaskFnTy, RTSGetArgsFromTask}, + {"__rts_spawn", SpawnFnTy, RTSSpawn}, + {"__rts_sync", RTSFnTy, RTSSync}, + {"__rts_sync_nothrow", RTSFnTy, RTSSyncNoThrow}, + {"__rts_loop_grainsize_8", Grainsize8FnTy, RTSLoopGrainsize8}, + {"__rts_loop_grainsize_16", Grainsize16FnTy, RTSLoopGrainsize16}, + {"__rts_loop_grainsize_32", Grainsize32FnTy, RTSLoopGrainsize32}, + {"__rts_loop_grainsize_64", Grainsize64FnTy, RTSLoopGrainsize64}, + {"__rts_get_num_workers", WorkerInfoTy, RTSGetNumWorkers}, + {"__rts_get_worker_id", WorkerInfoTy, RTSGetWorkerID}, + }; + + // Add attributes to internalized functions. + for (RTSFnDesc FnDesc : RTSFunctions) { + assert(!FnDesc.FnCallee && "Redefining RTS function"); + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "Runtime function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + Fn->setDoesNotThrow(); + + // Unless we're debugging, mark the function as always_inline. This + // attribute is required for some functions, but is helpful for all + // functions. + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + else + Fn->removeFnAttr(Attribute::AlwaysInline); + + if (Fn->getName() == "__rts_get_num_workers" || + Fn->getName() == "__rts_get_worker_id") { + Fn->setLinkage(Function::InternalLinkage); + } + } + + // If no valid bitcode file was found fill in the missing pieces. + // An error should have been emitted already unless the user + // set DebugABICalls. + + if (StackFrameTy->isOpaque()) { + // Create a dummy __rts_stack_frame structure + StackFrameTy->setBody(Int64Ty); + } + // Create declarations of all RTS functions, and add basic attributes to those + // declarations. + for (RTSFnDesc FnDesc : RTSFunctions) { + if (FnDesc.FnCallee) + continue; + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "RTS function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + Fn->setDoesNotThrow(); + } +} + +void OMPTaskABI::addHelperAttributes(Function &Helper) { + // We'll be creating a new helper function, and we want to inline this helper + // function into that one to reduce overheads. + Helper.addFnAttr(Attribute::AlwaysInline); + // If the helper uses an argument structure, then it is not a write-only + // function. + if (getArgStructMode() != ArgStructMode::None) { + Helper.removeFnAttr(Attribute::WriteOnly); + Helper.setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); + } + // Note that the address of the helper is unimportant. + Helper.setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + // The helper is internal to this module. We use internal linkage, rather + // than private linkage, so that tools can still reference the helper + // function. + Helper.setLinkage(GlobalValue::InternalLinkage); +} + +// Check whether the allocation of a __rts_stack_frame can be inserted after +// instruction \p I. +static bool skipInstruction(const Instruction &I) { + if (isa(I)) + return true; + + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(&I)) { + // Skip simple intrinsics + switch(II->getIntrinsicID()) { + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + return true; + default: + return false; + } + } + + return false; +} + +// Scan the basic block \p B to find a point to insert the allocation of a +// __rts_stack_frame. +static Instruction *getStackFrameInsertPt(BasicBlock &B) { + BasicBlock::iterator BI(B.getFirstInsertionPt()); + BasicBlock::const_iterator BE(B.end()); + + // Scan the basic block for the first instruction we should not skip. + while (BI != BE) { + if (!skipInstruction(*BI)) { + return &*BI; + } + ++BI; + } + + // We reached the end of the basic block; return the terminator. + return B.getTerminator(); +} + +/// Create the __rts_stack_frame for the spawning function. +Value *OMPTaskABI::CreateStackFrame(Function &F) { + const DataLayout &DL = F.getParent()->getDataLayout(); + Type *SFTy = StackFrameTy; + + IRBuilder<> B(getStackFrameInsertPt(F.getEntryBlock())); + AllocaInst *SF = B.CreateAlloca(SFTy, DL.getAllocaAddrSpace(), + /*ArraySize*/ nullptr, + /*Name*/ StackFrameName); + + SF->setAlignment(StackFrameAlign); + + return SF; +} + +Value* OMPTaskABI::GetOrCreateStackFrame(Function &F) { + if (DetachCtxToStackFrame.count(&F)) + return DetachCtxToStackFrame[&F]; + + Value *SF = CreateStackFrame(F); + DetachCtxToStackFrame[&F] = SF; + + return SF; +} + +// Insert a call in Function F to __rts_enter_frame to initialize the +// __rts_stack_frame in F. If TaskFrameCreate is nonnull, the call to +// __rts_enter_frame is inserted at TaskFramecreate. +CallInst *OMPTaskABI::InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate, + bool Helper) { + Instruction *SF = cast(GetOrCreateStackFrame(F)); + + BasicBlock::iterator InsertPt = ++SF->getIterator(); + IRBuilder<> B(&(F.getEntryBlock()), InsertPt); + if (TaskFrameCreate) + B.SetInsertPoint(TaskFrameCreate); + if (!B.getCurrentDebugLocation()) { + // Try to find debug information later in this block for the ABI call. + BasicBlock::iterator BI = B.GetInsertPoint(); + BasicBlock::const_iterator BE(B.GetInsertBlock()->end()); + while (BI != BE) { + if (DebugLoc Loc = BI->getDebugLoc()) { + B.SetCurrentDebugLocation(Loc); + break; + } + ++BI; + } + } + + Value *Args[1] = {SF}; + return B.CreateCall(RTSEnterFrame, Args); +} + +// Insert a call in Function F to pop the stack frame. +// +// PromoteCallsToInvokes dictates whether call instructions that can throw are +// promoted to invoke instructions prior to inserting the epilogue-function +// calls. +void OMPTaskABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper) {} + +/// Lower a call to get the grainsize of a Tapir loop. +Value *OMPTaskABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Limit = GrainsizeCall->getArgOperand(0); + IRBuilder<> Builder(GrainsizeCall); + + // Select the appropriate __rts_grainsize function, based on the type. + FunctionCallee RTSGrainsizeCall; + if (GrainsizeCall->getType()->isIntegerTy(8)) + RTSGrainsizeCall = RTSLoopGrainsize8; + else if (GrainsizeCall->getType()->isIntegerTy(16)) + RTSGrainsizeCall = RTSLoopGrainsize16; + else if (GrainsizeCall->getType()->isIntegerTy(32)) + RTSGrainsizeCall = RTSLoopGrainsize32; + else if (GrainsizeCall->getType()->isIntegerTy(64)) + RTSGrainsizeCall = RTSLoopGrainsize64; + else + llvm_unreachable("No RTSGrainsize call matches type for Tapir loop."); + + Value *Grainsize = Builder.CreateCall(RTSGrainsizeCall, Limit); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +// Lower a sync instruction SI. +void OMPTaskABI::lowerSync(SyncInst &SI) { + Function &Fn = *SI.getFunction(); + if (!DetachCtxToStackFrame[&Fn]) + // If we have not created a stackframe for this function, then we don't need + // to handle the sync. + return; + + Value *SF = GetOrCreateStackFrame(Fn); + Value *Args[] = { SF }; + assert(Args[0] && "sync used in function without frame!"); + + Instruction *SyncUnwind = nullptr; + BasicBlock *SyncCont = SI.getSuccessor(0); + BasicBlock *SyncUnwindDest = nullptr; + // Determine whether a sync.unwind immediately follows SI. + if (InvokeInst *II = + dyn_cast(SyncCont->getFirstNonPHIOrDbgOrLifetime())) { + if (isSyncUnwind(II)) { + SyncUnwind = II; + SyncCont = II->getNormalDest(); + SyncUnwindDest = II->getUnwindDest(); + } + } + + CallBase *CB; + if (!SyncUnwindDest) { + if (Fn.doesNotThrow()) + CB = CallInst::Create(RTSSyncNoThrow, Args, "", + /*insert before*/ &SI); + else + CB = CallInst::Create(RTSSync, Args, "", /*insert before*/ &SI); + + BranchInst::Create(SyncCont, CB->getParent()); + } else { + CB = InvokeInst::Create(RTSSync, SyncCont, SyncUnwindDest, Args, "", + /*insert before*/ &SI); + for (PHINode &PN : SyncCont->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + for (PHINode &PN : SyncUnwindDest->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + } + CB->setDebugLoc(SI.getDebugLoc()); + SI.eraseFromParent(); + + // Mark this function as stealable. + Fn.addFnAttr(Attribute::Stealable); +} + +bool OMPTaskABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + return false; +} +void OMPTaskABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) {} +void OMPTaskABI::postProcessHelper(Function &F) {} + +void OMPTaskABI::preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + if (IsSpawner) + InsertStackFramePush(F, TaskFrameCreate, /*Helper*/ true); +} + +void OMPTaskABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + if (IsSpawner) + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ true, + /*InsertPauseFrame*/ true, /*Helper*/ true); +} + +void OMPTaskABI::preProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + InsertStackFramePush(F); +} + +void OMPTaskABI::postProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ false, + /*InsertPauseFrame*/ false, /*Helper*/ false); +} + +void OMPTaskABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { + const DataLayout &DL = DestM.getDataLayout(); + CallBase *ReplCall = cast(TOI.ReplCall); + Function *Helper = TOI.Outline; + + Function &F = *ReplCall->getFunction(); + Value *SF = DetachCtxToStackFrame[&F]; + assert(SF && "No frame found for spawning task"); + + // Create OMP function helper to match required interface. + LLVMContext &C = M.getContext(); + Function *OMPTask = + Function::Create(SpawnBodyFnTy, GlobalValue::InternalLinkage, + "omp_task." + Helper->getName(), &M); + + { + Function *NewFunc = OMPTask; + Function *OldFunc = Helper; + + // Copy all attributes other than those stored in the AttributeSet. We need + // to remap the parameter indices of the AttributeSet. + AttributeList NewAttrs = NewFunc->getAttributes(); + NewFunc->copyAttributesFrom(OldFunc); + NewFunc->setAttributes(NewAttrs); + + SmallVector NewArgAttrs(NewFunc->arg_size()); + AttributeList OldAttrs = OldFunc->getAttributes(); + + NewFunc->setAttributes( + AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(), + OldAttrs.getRetAttrs(), NewArgAttrs)); + } + + // Get the alignment of the helper arguments. The bitcode-ABI functions may + // use the alignment to align the shared variables in the storage allocated by + // the OpenMP runtime, especially to accommodate vector arguments. + AllocaInst *ArgAlloca = cast(ReplCall->getArgOperand(0)); + uint64_t Alignment = + DL.getPrefTypeAlign(ArgAlloca->getAllocatedType()).value(); + + { + // Populate the OMP function helper. + BasicBlock *OMPTaskBB = BasicBlock::Create(C, "entry", OMPTask); + IRBuilder<> IRB(ReturnInst::Create( + C, Constant::getNullValue(Type::getInt32Ty(C)), OMPTaskBB)); + // Get the helper arguments from the task structure. + Value *ArgsFromTask = IRB.CreateCall( + RTSGetArgsFromTask, {OMPTask->getArg(1), IRB.getInt64(Alignment)}); + Value *ArgsCast = IRB.CreateBitOrPointerCast( + ArgsFromTask, ArgAlloca->getType()); + // Insert call to helper in OMP function helper. + CallInst *Call = IRB.CreateCall(ReplCall->getCalledFunction(), {ArgsCast}); + Call->setCallingConv(ReplCall->getCallingConv()); + } + + // Replace the original call to the helper with a call to __rts_spawn. + IRBuilder<> B(ReplCall); + Value *ArgCast = B.CreateBitOrPointerCast(ArgAlloca, SpawnBodyFnArgTy); + auto ArgSize = ArgAlloca->getAllocationSizeInBits(DL); + assert(ArgSize && + "Could not determine size of compiler-generated ArgStruct."); + Value *ArgSizeVal = ConstantInt::get(SpawnBodyFnArgSizeTy, *ArgSize / 8); + + if (InvokeInst *II = dyn_cast(ReplCall)) { + B.CreateInvoke(RTSSpawn, II->getNormalDest(), II->getUnwindDest(), + {SF, OMPTask, ArgCast, ArgSizeVal, B.getInt64(Alignment)}); + } else { + B.CreateCall(RTSSpawn, + {SF, OMPTask, ArgCast, ArgSizeVal, B.getInt64(Alignment)}); + } + + ReplCall->eraseFromParent(); +} diff --git a/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp new file mode 100644 index 00000000000000..f60b202b755d4a --- /dev/null +++ b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp @@ -0,0 +1,1144 @@ +//===- OpenCilkABI.cpp - Interface to the OpenCilk runtime system------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the OpenCilk ABI to convert Tapir instructions to calls +// into the OpenCilk runtime system. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/OpenCilkABI.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Transforms/Tapir/CilkRTSCilkFor.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "opencilk" + +extern cl::opt DebugABICalls; + +static cl::opt UseOpenCilkRuntimeBC( + "use-opencilk-runtime-bc", cl::init(true), + cl::desc("Use a bitcode file for the OpenCilk runtime ABI"), cl::Hidden); +static cl::opt ClOpenCilkRuntimeBCPath( + "opencilk-runtime-bc-path", cl::init(""), + cl::desc("Path to the bitcode file for the OpenCilk runtime ABI"), + cl::Hidden); + +#define CILKRTS_FUNC(name) Get__cilkrts_##name() + +static const StringRef StackFrameName = "__cilkrts_sf"; + +OpenCilkABI::OpenCilkABI(Module &M) : TapirTarget(M) {} + +// Helper function to fix the implementation of __cilk_sync. In particular, +// this fixup ensures that __cilk_sync, and specific __cilkrts method calls +// therein, appear that they may throw an exception. Since the bitcode-ABI file +// is built from C code, it won't necessarily be marked appropriately for +// exception handling. +static void fixCilkSyncFn(Module &M, Function *Fn) { + Fn->removeFnAttr(Attribute::NoUnwind); + Function *ExceptionRaiseFn = M.getFunction("__cilkrts_check_exception_raise"); + Function *ExceptionResumeFn = M.getFunction("__cilkrts_check_exception_resume"); + for (Instruction &I : instructions(Fn)) + if (CallBase *CB = dyn_cast(&I)) + if (CB->getCalledFunction() == ExceptionRaiseFn || + CB->getCalledFunction() == ExceptionResumeFn) + CB->removeFnAttr(Attribute::NoUnwind); +} + +namespace { + +// Custom DiagnosticInfo for linking the OpenCilk ABI bitcode file. +class OpenCilkABILinkDiagnosticInfo : public DiagnosticInfo { + const Module *SrcM; + const Twine &Msg; + +public: + OpenCilkABILinkDiagnosticInfo(DiagnosticSeverity Severity, const Module *SrcM, + const Twine &Msg) + : DiagnosticInfo(DK_Lowering, Severity), SrcM(SrcM), Msg(Msg) {} + void print(DiagnosticPrinter &DP) const override { + DP << "linking module '" << SrcM->getModuleIdentifier() << "': " << Msg; + } +}; + +// Custom DiagnosticHandler to handle diagnostics arising when linking the +// OpenCilk ABI bitcode file. +class OpenCilkABIDiagnosticHandler final : public DiagnosticHandler { + const Module *SrcM; + DiagnosticHandler *OrigHandler; + +public: + OpenCilkABIDiagnosticHandler(const Module *SrcM, + DiagnosticHandler *OrigHandler) + : SrcM(SrcM), OrigHandler(OrigHandler) {} + + bool handleDiagnostics(const DiagnosticInfo &DI) override { + if (DI.getKind() != DK_Linker) + return OrigHandler->handleDiagnostics(DI); + + std::string MsgStorage; + { + raw_string_ostream Stream(MsgStorage); + DiagnosticPrinterRawOStream DP(Stream); + DI.print(DP); + } + return OrigHandler->handleDiagnostics( + OpenCilkABILinkDiagnosticInfo(DI.getSeverity(), SrcM, MsgStorage)); + } +}; + +// Structure recording information about Cilk ABI functions. +struct CilkRTSFnDesc { + StringRef FnName; + FunctionType *FnType; + FunctionCallee &FnCallee; +}; + +} // namespace + +void OpenCilkABI::setOptions(const TapirTargetOptions &Options) { + if (!isa(Options)) + return; + + const OpenCilkABIOptions &OptionsCast = cast(Options); + + // Get the path to the runtime bitcode file. + RuntimeBCPath = OptionsCast.getRuntimeBCPath(); +} + +void OpenCilkABI::prepareModule() { + LLVMContext &C = M.getContext(); + Type *Int8Ty = Type::getInt8Ty(C); + Type *Int16Ty = Type::getInt16Ty(C); + Type *Int32Ty = Type::getInt32Ty(C); + Type *Int64Ty = Type::getInt64Ty(C); + + if (UseOpenCilkRuntimeBC) { + // If a runtime bitcode path is given via the command line, use it. + if ("" != ClOpenCilkRuntimeBCPath) + RuntimeBCPath = ClOpenCilkRuntimeBCPath; + + if ("" == RuntimeBCPath) + C.emitError("OpenCilkABI: No OpenCilk bitcode ABI file given."); + + LLVM_DEBUG(dbgs() << "Using external bitcode file for OpenCilk ABI: " + << RuntimeBCPath << "\n"); + SMDiagnostic SMD; + + // Parse the bitcode file. This call imports structure definitions, but not + // function definitions. + if (std::unique_ptr ExternalModule = + parseIRFile(RuntimeBCPath, SMD, C)) { + // Get the original DiagnosticHandler for this context. + std::unique_ptr OrigDiagHandler = + C.getDiagnosticHandler(); + + // Setup an OpenCilkABIDiagnosticHandler for this context, to handle + // diagnostics that arise from linking ExternalModule. + C.setDiagnosticHandler(std::make_unique( + ExternalModule.get(), OrigDiagHandler.get())); + + // Link the external module into the current module, copying over global + // values. + // + // TODO: Consider restructuring the import process to use + // Linker::Flags::LinkOnlyNeeded to copy over only the necessary contents + // from the external module. + bool Fail = Linker::linkModules( + M, std::move(ExternalModule), Linker::Flags::None, + [](Module &M, const StringSet<> &GVS) { + for (StringRef GVName : GVS.keys()) { + LLVM_DEBUG(dbgs() << "Linking global value " << GVName << "\n"); + if (Function *Fn = M.getFunction(GVName)) { + if (!Fn->isDeclaration()) + // We set the function's linkage as available_externally, so + // that subsequent optimizations can remove these definitions + // from the module. We don't want this module redefining any of + // these symbols, even if they aren't inlined, because the + // OpenCilk runtime library will provide those definitions + // later. + Fn->setLinkage(Function::AvailableExternallyLinkage); + } else if (GlobalVariable *G = M.getGlobalVariable(GVName)) { + if (!G->isDeclaration()) + G->setLinkage(GlobalValue::AvailableExternallyLinkage); + } + } + }); + if (Fail) + C.emitError("OpenCilkABI: Failed to link bitcode ABI file: " + + Twine(RuntimeBCPath)); + + // Restore the original DiagnosticHandler for this context. + C.setDiagnosticHandler(std::move(OrigDiagHandler)); + } else { + C.emitError("OpenCilkABI: Failed to parse bitcode ABI file: " + + Twine(RuntimeBCPath)); + } + } + + // Get or create local definitions of Cilk RTS structure types. + const char *StackFrameName = "struct.__cilkrts_stack_frame"; + StackFrameTy = StructType::lookupOrCreate(C, StackFrameName); + WorkerTy = StructType::lookupOrCreate(C, "struct.__cilkrts_worker"); + + PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); + Type *VoidTy = Type::getVoidTy(C); + Type *VoidPtrTy = Type::getInt8PtrTy(C); + + // Define the types of the CilkRTS functions. + FunctionType *CilkRTSFnTy = + FunctionType::get(VoidTy, {StackFramePtrTy}, false); + FunctionType *CilkPrepareSpawnFnTy = + FunctionType::get(Int32Ty, {StackFramePtrTy}, false); + FunctionType *CilkRTSEnterLandingpadFnTy = + FunctionType::get(VoidTy, {StackFramePtrTy, Int32Ty}, false); + FunctionType *CilkRTSPauseFrameFnTy = FunctionType::get( + VoidTy, {StackFramePtrTy, PointerType::getInt8PtrTy(C)}, false); + FunctionType *Grainsize8FnTy = FunctionType::get(Int8Ty, {Int8Ty}, false); + FunctionType *Grainsize16FnTy = FunctionType::get(Int16Ty, {Int16Ty}, false); + FunctionType *Grainsize32FnTy = FunctionType::get(Int32Ty, {Int32Ty}, false); + FunctionType *Grainsize64FnTy = FunctionType::get(Int64Ty, {Int64Ty}, false); + FunctionType *LookupTy = FunctionType::get( + VoidPtrTy, {VoidPtrTy, Int64Ty, VoidPtrTy, VoidPtrTy}, false); + FunctionType *UnregTy = FunctionType::get(VoidTy, {VoidPtrTy}, false); + FunctionType *Reg32Ty = + FunctionType::get(VoidTy, {VoidPtrTy, Int32Ty, VoidPtrTy, + VoidPtrTy}, false); + FunctionType *Reg64Ty = + FunctionType::get(VoidTy, {VoidPtrTy, Int64Ty, VoidPtrTy, + VoidPtrTy}, false); + + // Create an array of CilkRTS functions, with their associated types and + // FunctionCallee member variables in the OpenCilkABI class. + CilkRTSFnDesc CilkRTSFunctions[] = { + {"__cilkrts_enter_frame", CilkRTSFnTy, CilkRTSEnterFrame}, + {"__cilkrts_enter_frame_helper", CilkRTSFnTy, CilkRTSEnterFrameHelper}, + {"__cilkrts_detach", CilkRTSFnTy, CilkRTSDetach}, + {"__cilkrts_leave_frame", CilkRTSFnTy, CilkRTSLeaveFrame}, + {"__cilkrts_leave_frame_helper", CilkRTSFnTy, CilkRTSLeaveFrameHelper}, + {"__cilk_prepare_spawn", CilkPrepareSpawnFnTy, CilkPrepareSpawn}, + {"__cilk_sync", CilkRTSFnTy, CilkSync}, + {"__cilk_sync_nothrow", CilkRTSFnTy, CilkSyncNoThrow}, + {"__cilk_parent_epilogue", CilkRTSFnTy, CilkParentEpilogue}, + {"__cilk_helper_epilogue", CilkRTSFnTy, CilkHelperEpilogue}, + {"__cilkrts_enter_landingpad", CilkRTSEnterLandingpadFnTy, + CilkRTSEnterLandingpad}, + {"__cilkrts_pause_frame", CilkRTSPauseFrameFnTy, CilkRTSPauseFrame}, + {"__cilk_helper_epilogue_exn", CilkRTSPauseFrameFnTy, + CilkHelperEpilogueExn}, + {"__cilkrts_cilk_for_grainsize_8", Grainsize8FnTy, + CilkRTSCilkForGrainsize8}, + {"__cilkrts_cilk_for_grainsize_16", Grainsize16FnTy, + CilkRTSCilkForGrainsize16}, + {"__cilkrts_cilk_for_grainsize_32", Grainsize32FnTy, + CilkRTSCilkForGrainsize32}, + {"__cilkrts_cilk_for_grainsize_64", Grainsize64FnTy, + CilkRTSCilkForGrainsize64}, + {"__cilkrts_reducer_lookup", LookupTy, CilkRTSReducerLookup}, + {"__cilkrts_reducer_register_32", Reg32Ty, CilkRTSReducerRegister32}, + {"__cilkrts_reducer_register_64", Reg64Ty, CilkRTSReducerRegister64}, + {"__cilkrts_reducer_unregister", UnregTy, CilkRTSReducerUnregister}, + }; + + if (UseOpenCilkRuntimeBC) { + // Add attributes to internalized functions. + for (CilkRTSFnDesc FnDesc : CilkRTSFunctions) { + assert(!FnDesc.FnCallee && "Redefining Cilk function"); + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "Cilk runtime function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + // Because __cilk_sync is a C function that can throw an exception, update + // its attributes specially. No other CilkRTS functions can throw an + // exception. + if ("__cilk_sync" == FnDesc.FnName) + fixCilkSyncFn(M, Fn); + else + Fn->setDoesNotThrow(); + + // Unless we're debugging, mark the function as always_inline. This + // attribute is required for some functions, but is helpful for all + // functions. + if (!DebugABICalls) + Fn->addFnAttr(Attribute::AlwaysInline); + else + Fn->removeFnAttr(Attribute::AlwaysInline); + } + if (GlobalVariable *AlignVar = + M.getGlobalVariable("__cilkrts_stack_frame_align", true)) { + StackFrameAlign = AlignVar->getAlign(); + // Mark this variable with private linkage, to avoid linker failures when + // compiling with no optimizations. + AlignVar->setLinkage(GlobalValue::PrivateLinkage); + } + } else if (!DebugABICalls) { + // The OpenCilkABI target requires the use of a bitcode ABI file to generate + // correct code. + C.emitError( + "OpenCilkABI: Bitcode ABI file required for correct code generation."); + } + + // If no valid bitcode file was found fill in the missing pieces. + // An error should have been emitted already unless the user + // set DebugABICalls. + + if (StackFrameTy->isOpaque()) { + // Create a dummy __cilkrts_stack_frame structure + StackFrameTy->setBody(Int64Ty); + } + // Create declarations of all CilkRTS functions, and add basic attributes to + // those declarations. + for (CilkRTSFnDesc FnDesc : CilkRTSFunctions) { + if (FnDesc.FnCallee) + continue; + FnDesc.FnCallee = M.getOrInsertFunction(FnDesc.FnName, FnDesc.FnType); + assert(isa(FnDesc.FnCallee.getCallee()) && + "Cilk function is not a function"); + Function *Fn = cast(FnDesc.FnCallee.getCallee()); + + // Mark all CilkRTS functions nounwind, except for __cilk_sync. + if ("__cilk_sync" == FnDesc.FnName) + Fn->removeFnAttr(Attribute::NoUnwind); + else + Fn->setDoesNotThrow(); + } +} + +void OpenCilkABI::addHelperAttributes(Function &Helper) { + // Use a fast calling convention for the helper. + Helper.setCallingConv(CallingConv::Fast); + // Inlining the helper function is not legal. + Helper.removeFnAttr(Attribute::AlwaysInline); + Helper.addFnAttr(Attribute::NoInline); + // If the helper uses an argument structure, then it is not a write-only + // function. + if (getArgStructMode() != ArgStructMode::None) { + Helper.removeFnAttr(Attribute::WriteOnly); + Helper.setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); + } + // Note that the address of the helper is unimportant. + Helper.setUnnamedAddr(GlobalValue::UnnamedAddr::Global); + + // The helper is internal to this module. We use internal linkage, rather + // than private linkage, so that tools can still reference the helper + // function. + Helper.setLinkage(GlobalValue::InternalLinkage); +} + +void OpenCilkABI::remapAfterOutlining(BasicBlock *TFEntry, + ValueToValueMapTy &VMap) { + if (TapirRTCalls[TFEntry].empty()) + return; + + // Update the set of tapir.runtime.{start,end} intrinsics in the taskframe + // rooted at TFEntry to process. + SmallVector OldTapirRTCalls(TapirRTCalls[TFEntry]); + TapirRTCalls[TFEntry].clear(); + for (IntrinsicInst *II : OldTapirRTCalls) + TapirRTCalls[TFEntry].push_back(cast(VMap[II])); +} + +// Check whether the allocation of a __cilkrts_stack_frame can be inserted after +// instruction \p I. +static bool skipInstruction(const Instruction &I) { + if (isa(I)) + return true; + + if (isa(I)) + return true; + + if (const IntrinsicInst *II = dyn_cast(&I)) { + // Skip simple intrinsics + switch(II->getIntrinsicID()) { + case Intrinsic::annotation: + case Intrinsic::assume: + case Intrinsic::sideeffect: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: + case Intrinsic::is_constant: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + case Intrinsic::experimental_gc_result: + case Intrinsic::experimental_gc_relocate: + case Intrinsic::experimental_noalias_scope_decl: + case Intrinsic::syncregion_start: + case Intrinsic::taskframe_create: + return true; + default: + return false; + } + } + + return false; +} + +// Scan the basic block \p B to find a point to insert the allocation of a +// __cilkrts_stack_frame. +static Instruction *getStackFrameInsertPt(BasicBlock &B) { + BasicBlock::iterator BI(B.getFirstInsertionPt()); + BasicBlock::const_iterator BE(B.end()); + + // Scan the basic block for the first instruction we should not skip. + while (BI != BE) { + if (!skipInstruction(*BI)) { + return &*BI; + } + ++BI; + } + + // We reached the end of the basic block; return the terminator. + return B.getTerminator(); +} + +/// Create the __cilkrts_stack_frame for the spawning function. +Value *OpenCilkABI::CreateStackFrame(Function &F) { + const DataLayout &DL = F.getParent()->getDataLayout(); + Type *SFTy = StackFrameTy; + + IRBuilder<> B(getStackFrameInsertPt(F.getEntryBlock())); + AllocaInst *SF = B.CreateAlloca(SFTy, DL.getAllocaAddrSpace(), + /*ArraySize*/ nullptr, + /*Name*/ StackFrameName); + if (StackFrameAlign) + SF->setAlignment(StackFrameAlign.valueOrOne()); + + return SF; +} + +Value* OpenCilkABI::GetOrCreateCilkStackFrame(Function &F) { + if (DetachCtxToStackFrame.count(&F)) + return DetachCtxToStackFrame[&F]; + + Value *SF = CreateStackFrame(F); + DetachCtxToStackFrame[&F] = SF; + + return SF; +} + +// Insert a call in Function F to __cilkrts_detach at DetachPt, which must be +// after the allocation of the __cilkrts_stack_frame in F. +void OpenCilkABI::InsertDetach(Function &F, Instruction *DetachPt) { + Instruction *SF = cast(GetOrCreateCilkStackFrame(F)); + assert(SF && "No Cilk stack frame for Cilk function."); + Value *Args[1] = {SF}; + + // Scan function to see if it detaches. + LLVM_DEBUG({ + bool SimpleHelper = !canDetach(&F); + if (!SimpleHelper) + dbgs() << "NOTE: Detachable helper function itself detaches.\n"; + }); + + // Call __cilkrts_detach + IRBuilder<> IRB(DetachPt); + IRB.CreateCall(CILKRTS_FUNC(detach), Args); +} + +// Insert a call in Function F to __cilkrts_enter_frame{_helper} to initialize +// the __cilkrts_stack_frame in F. If TaskFrameCreate is nonnull, the call to +// __cilkrts_enter_frame{_helper} is inserted at TaskFramecreate. +CallInst *OpenCilkABI::InsertStackFramePush(Function &F, + Instruction *TaskFrameCreate, + bool Helper) { + Instruction *SF = cast(GetOrCreateCilkStackFrame(F)); + + BasicBlock::iterator InsertPt = ++SF->getIterator(); + IRBuilder<> B(&(F.getEntryBlock()), InsertPt); + if (TaskFrameCreate) + B.SetInsertPoint(TaskFrameCreate); + if (!B.getCurrentDebugLocation()) { + // Try to find debug information later in this block for the ABI call. + BasicBlock::iterator BI = B.GetInsertPoint(); + BasicBlock::const_iterator BE(B.GetInsertBlock()->end()); + while (BI != BE) { + if (DebugLoc Loc = BI->getDebugLoc()) { + B.SetCurrentDebugLocation(Loc); + break; + } + ++BI; + } + + // Next, try to find debug information earlier in this block. + if (!B.getCurrentDebugLocation()) { + BI = B.GetInsertPoint(); + BasicBlock::const_iterator BB(B.GetInsertBlock()->begin()); + while (BI != BB) { + --BI; + if (DebugLoc Loc = BI->getDebugLoc()) { + B.SetCurrentDebugLocation(Loc); + break; + } + } + } + } + + Value *Args[1] = {SF}; + if (Helper) + return B.CreateCall(CILKRTS_FUNC(enter_frame_helper), Args); + else + return B.CreateCall(CILKRTS_FUNC(enter_frame), Args); +} + +// Insert a call in Function F to the appropriate epilogue function. +// +// - A call to __cilk_parent_epilogue() is inserted at a return from a +// spawning function. +// +// - A call to __cilk_helper_epilogue() is inserted at a return from a +// spawn-helper function. +// +// - A call to __cilk_helper_epiluge_exn() is inserted at a resume from a +// spawn-helper function. +// +// PromoteCallsToInvokes dictates whether call instructions that can throw are +// promoted to invoke instructions prior to inserting the epilogue-function +// calls. +void OpenCilkABI::InsertStackFramePop(Function &F, bool PromoteCallsToInvokes, + bool InsertPauseFrame, bool Helper) { + Value *SF = GetOrCreateCilkStackFrame(F); + SmallPtrSet Returns; + SmallPtrSet Resumes; + + // Add eh cleanup that returns control to the runtime + EscapeEnumerator EE(F, "cilkrts_cleanup", PromoteCallsToInvokes); + while (IRBuilder<> *Builder = EE.Next()) { + if (ResumeInst *RI = dyn_cast(Builder->GetInsertPoint())) { + if (!RI->getDebugLoc()) + // Attempt to set the debug location of this resume to match one of the + // preceeding terminators. + for (const BasicBlock *Pred : predecessors(RI->getParent())) + if (const DebugLoc &Loc = Pred->getTerminator()->getDebugLoc()) { + RI->setDebugLoc(Loc); + break; + } + Resumes.insert(RI); + } + else if (ReturnInst *RI = dyn_cast(Builder->GetInsertPoint())) + Returns.insert(RI); + } + + for (ReturnInst *RI : Returns) { + if (Helper) { + CallInst::Create(GetCilkHelperEpilogueFn(), {SF}, "", RI) + ->setDebugLoc(RI->getDebugLoc()); + } else { + CallInst::Create(GetCilkParentEpilogueFn(), {SF}, "", RI) + ->setDebugLoc(RI->getDebugLoc()); + } + } + for (ResumeInst *RI : Resumes) { + if (InsertPauseFrame) { + Value *Exn = ExtractValueInst::Create(RI->getValue(), {0}, "", RI); + // If throwing an exception, pass the exception object to the epilogue + // function. + CallInst::Create(GetCilkHelperEpilogueExnFn(), {SF, Exn}, "", RI) + ->setDebugLoc(RI->getDebugLoc()); + } + } +} + +// Lower any calls to tapir.runtime.{start,end} that need to be processed. +void OpenCilkABI::LowerTapirRTCalls(Function &F, BasicBlock *TFEntry) { + Instruction *SF = cast(GetOrCreateCilkStackFrame(F)); + for (IntrinsicInst *II : TapirRTCalls[TFEntry]) { + IRBuilder<> Builder(II); + if (Intrinsic::tapir_runtime_start == II->getIntrinsicID()) { + // Lower calls to tapir.runtime.start to __cilkrts_enter_frame. + Builder.CreateCall(CILKRTS_FUNC(enter_frame), {SF}); + + // Find all tapir.runtime.ends that use this tapir.runtime.start, and + // lower them to calls to __cilk_parent_epilogue. + for (Use &U : II->uses()) + if (IntrinsicInst *UII = dyn_cast(U.getUser())) + if (Intrinsic::tapir_runtime_end == UII->getIntrinsicID()) { + Builder.SetInsertPoint(UII); + Builder.CreateCall(GetCilkParentEpilogueFn(), {SF}); + } + } + } +} + +void OpenCilkABI::MarkSpawner(Function &F) { + // If the spawner F might throw, then we mark F with the Cilk personality + // function, which ensures that the Cilk stack frame of F is properly unwound. + if (!F.doesNotThrow()) { + LLVMContext &C = M.getContext(); + // Get the type of the Cilk personality function the same way that clang and + // EscapeEnumerator get the type of a personality function. + Function *Personality = cast( + M.getOrInsertFunction("__cilk_personality_v0", + FunctionType::get(Type::getInt32Ty(C), true)) + .getCallee()); + F.setPersonalityFn(Personality); + } + + // Mark this function as stealable. + F.addFnAttr(Attribute::Stealable); + F.setMemoryEffects( + MemoryEffects(MemoryEffects::Location::Other, ModRefInfo::ModRef)); +} + +/// Lower a call to get the grainsize of a Tapir loop. +Value *OpenCilkABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Limit = GrainsizeCall->getArgOperand(0); + IRBuilder<> Builder(GrainsizeCall); + + // Select the appropriate __cilkrts_grainsize function, based on the type. + FunctionCallee CilkRTSGrainsizeCall; + if (GrainsizeCall->getType()->isIntegerTy(8)) + CilkRTSGrainsizeCall = CILKRTS_FUNC(cilk_for_grainsize_8); + else if (GrainsizeCall->getType()->isIntegerTy(16)) + CilkRTSGrainsizeCall = CILKRTS_FUNC(cilk_for_grainsize_16); + else if (GrainsizeCall->getType()->isIntegerTy(32)) + CilkRTSGrainsizeCall = CILKRTS_FUNC(cilk_for_grainsize_32); + else if (GrainsizeCall->getType()->isIntegerTy(64)) + CilkRTSGrainsizeCall = CILKRTS_FUNC(cilk_for_grainsize_64); + else + llvm_unreachable("No CilkRTSGrainsize call matches type for Tapir loop."); + + Value *Grainsize = Builder.CreateCall(CilkRTSGrainsizeCall, Limit); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +BasicBlock *OpenCilkABI::GetDefaultSyncLandingpad(Function &F, Value *SF, + DebugLoc Loc) { + // Return an existing default sync landingpad, if there is one. + if (DefaultSyncLandingpad.count(&F)) + return cast(DefaultSyncLandingpad[&F]); + + // Create a default cleanup landingpad block. + LLVMContext &C = F.getContext(); + const Twine Name = "default_sync_lpad"; + BasicBlock *CleanupBB = BasicBlock::Create(C, Name, &F); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + + IRBuilder<> Builder(CleanupBB); + Builder.SetCurrentDebugLocation(Loc); + LandingPadInst *LPad = Builder.CreateLandingPad(ExnTy, 1, Name + ".lpad"); + LPad->setCleanup(true); + // Insert a call to __cilkrts_enter_landingpad. + Value *Sel = Builder.CreateExtractValue(LPad, {1}, "sel"); + Value *CilkLPadArgs[] = {SF, Sel}; + Builder.CreateCall(CILKRTS_FUNC(enter_landingpad), CilkLPadArgs, ""); + // Insert a resume. + Builder.CreateResume(LPad); + + DefaultSyncLandingpad[&F] = CleanupBB; + + return CleanupBB; +} + +// Lower a sync instruction SI. +void OpenCilkABI::lowerSync(SyncInst &SI) { + Function &Fn = *SI.getFunction(); + if (!DetachCtxToStackFrame[&Fn]) + // If we have not created a stackframe for this function, then we don't need + // to handle the sync. + return; + + Value *SF = GetOrCreateCilkStackFrame(Fn); + Value *Args[] = { SF }; + assert(Args[0] && "sync used in function without frame!"); + + Instruction *SyncUnwind = nullptr; + BasicBlock *SyncCont = SI.getSuccessor(0); + BasicBlock *SyncUnwindDest = nullptr; + // Determine whether a sync.unwind immediately follows SI. + if (InvokeInst *II = + dyn_cast(SyncCont->getFirstNonPHIOrDbgOrLifetime())) { + if (isSyncUnwind(II)) { + SyncUnwind = II; + SyncCont = II->getNormalDest(); + SyncUnwindDest = II->getUnwindDest(); + } + } else if (CallBase *CB = dyn_cast( + SyncCont->getFirstNonPHIOrDbgOrLifetime())) { + if (isSyncUnwind(CB)) + SyncUnwind = CB; + } + + CallBase *CB; + if (!SyncUnwindDest) { + if (Fn.doesNotThrow()) { + // This function doesn't throw any exceptions, so use the no-throw version + // of cilk_sync. + CB = CallInst::Create(GetCilkSyncNoThrowFn(), Args, "", + /*insert before*/ &SI); + BranchInst::Create(SyncCont, CB->getParent()); + } else if (SyncUnwind) { + // The presence of the sync.unwind indicates that the sync might rethrow + // an exception, but there isn't a landingpad associated with the sync. + + // Get the default sync-landingpad block to use instead, creating it if + // necessary. + BasicBlock *DefaultSyncLandingpad = + GetDefaultSyncLandingpad(Fn, SF, SI.getDebugLoc()); + + // Invoke __cilk_sync, using DefaultSyncLandingpad as the unwind + // destination. + CB = InvokeInst::Create(GetCilkSyncFn(), SyncCont, DefaultSyncLandingpad, + Args, "", + /*insert before*/ &SI); + } else { + // TODO: This case shouldn't be reachable. Check whether it is reachable. + CB = CallInst::Create(GetCilkSyncFn(), Args, "", /*insert before*/ &SI); + BranchInst::Create(SyncCont, CB->getParent()); + } + } else { + CB = InvokeInst::Create(GetCilkSyncFn(), SyncCont, SyncUnwindDest, Args, "", + /*insert before*/ &SI); + for (PHINode &PN : SyncCont->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + for (PHINode &PN : SyncUnwindDest->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(SyncUnwind->getParent()), + SI.getParent()); + } + CB->setDebugLoc(SI.getDebugLoc()); + SI.eraseFromParent(); + + // Remember to inline this call later. + CallsToInline.insert(CB); + + // Mark this function as stealable. + Fn.addFnAttr(Attribute::Stealable); +} + +void OpenCilkABI::preProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + // If the outlined task F itself performs spawns, set up F to support stealing + // continuations. + if (IsSpawner) + MarkSpawner(F); + + CallInst *EnterFrame = + InsertStackFramePush(F, TaskFrameCreate, /*Helper*/ true); + InsertDetach(F, (DetachPt ? DetachPt : &*(++EnterFrame->getIterator()))); +} + +void OpenCilkABI::postProcessOutlinedTask(Function &F, Instruction *DetachPt, + Instruction *TaskFrameCreate, + bool IsSpawner, BasicBlock *TFEntry) { + // Because F is a spawned task, we want to insert landingpads for all calls + // that can throw, so we can pop the stackframe correctly if they do throw. + // In particular, popping the stackframe of a spawned task may discover that + // the parent was stolen, in which case we want to save the exception for + // later reduction. + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ true, + /*InsertPauseFrame*/ true, /*Helper*/ true); + + // TODO: If F is itself a spawner, see if we need to ensure that the Cilk + // personality function does not pop an already-popped frame. We might be + // able to do this by checking if sf->call_parent == NULL before performing a + // pop in the personality function. +} + +void OpenCilkABI::preProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + MarkSpawner(F); + if (TapirRTCalls[TFEntry].empty()) { + InsertStackFramePush(F); + } else { + LowerTapirRTCalls(F, TFEntry); + } + Value *SF = DetachCtxToStackFrame[&F]; + for (BasicBlock &BB : F) { + if (BB.isLandingPad()) { + LandingPadInst *LPad = BB.getLandingPadInst(); + Instruction *InsertPt = &*BB.getFirstInsertionPt(); + IRBuilder<> Builder(InsertPt); + // Try to find debug information for the ABI call. First check the + // landing pad. + if (!Builder.getCurrentDebugLocation()) + Builder.SetCurrentDebugLocation(LPad->getDebugLoc()); + // Next, check later in the block + if (!Builder.getCurrentDebugLocation()) { + BasicBlock::iterator BI = Builder.GetInsertPoint(); + BasicBlock::const_iterator BE(Builder.GetInsertBlock()->end()); + while (BI != BE) { + if (DebugLoc Loc = BI->getDebugLoc()) { + Builder.SetCurrentDebugLocation(Loc); + break; + } + ++BI; + } + } + + Value *Sel = Builder.CreateExtractValue(LPad, 1, "sel"); + Builder.CreateCall(CILKRTS_FUNC(enter_landingpad), {SF, Sel}); + } + } +} + +void OpenCilkABI::postProcessRootSpawner(Function &F, BasicBlock *TFEntry) { + // F is a root spawner, not itself a spawned task. We don't need to promote + // calls to invokes, since the Cilk personality function will take care of + // popping the frame if no landingpad exists for a given call. + if (TapirRTCalls[TFEntry].empty()) + InsertStackFramePop(F, /*PromoteCallsToInvokes*/ false, + /*InsertPauseFrame*/ false, /*Helper*/ false); +} + +void OpenCilkABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { + Instruction *ReplStart = TOI.ReplStart; + Instruction *ReplCall = TOI.ReplCall; + + Function &F = *ReplCall->getFunction(); + Value *SF = DetachCtxToStackFrame[&F]; + assert(SF && "No frame found for spawning task"); + + // Split the basic block containing the detach replacement just before the + // start of the detach-replacement instructions. + BasicBlock *DetBlock = ReplStart->getParent(); + BasicBlock *CallBlock = SplitBlock(DetBlock, ReplStart, &DT); + + // Emit a __cilk_spawn_prepare at the end of the block preceding the split-off + // detach replacement. + Instruction *SpawnPt = DetBlock->getTerminator(); + IRBuilder<> B(SpawnPt); + CallBase *SpawnPrepCall = B.CreateCall(GetCilkPrepareSpawnFn(), {SF}); + + // Remember to inline this call later. + CallsToInline.insert(SpawnPrepCall); + + // Get the ordinary continuation of the detach. + BasicBlock *CallCont; + if (InvokeInst *II = dyn_cast(ReplCall)) + CallCont = II->getNormalDest(); + else // isa(CallSite) + CallCont = CallBlock->getSingleSuccessor(); + + // Insert a conditional branch, based on the result of the + // __cilk_spawn_prepare, to either the detach replacement or the continuation. + Value *SpawnPrepRes = B.CreateICmpEQ( + SpawnPrepCall, ConstantInt::get(SpawnPrepCall->getType(), 0)); + B.CreateCondBr(SpawnPrepRes, CallBlock, CallCont); + for (PHINode &PN : CallCont->phis()) + PN.addIncoming(PN.getIncomingValueForBlock(CallBlock), DetBlock); + + SpawnPt->eraseFromParent(); +} + +// Helper function to inline calls to compiler-generated Cilk runtime functions +// when possible. This inlining is necessary to properly implement some Cilk +// runtime "calls," such as __cilk_sync(). +static inline void inlineCilkFunctions( + Function &F, SmallPtrSetImpl &CallsToInline) { + for (CallBase *CB : CallsToInline) { + InlineFunctionInfo IFI; + InlineFunction(*CB, IFI); + } + CallsToInline.clear(); +} + +// For the taskframe at \p TFEntry containing blocks \p TFBlocks, find all +// outermost tapir.runtime.{start,end} intrinsics, which are not enclosed +// between other tapir.runtime.{start,end} intrinsics in this traksframe. +// Furthermore, record and successor taskframes in \p SuccessorTFs that are not +// enclosed between tapir.runtime.{start,end} intrinsics. +static bool findOutermostTapirRTCallsForTaskFrame( + SmallVectorImpl &TapirRTCalls, BasicBlock *TFEntry, + SmallPtrSetImpl &TFBlocks, + SmallPtrSetImpl &SuccessorTFs, TaskInfo &TI) { + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(TFEntry->begin()); + + while (!Worklist.empty()) { + BasicBlock::iterator Iter = Worklist.pop_back_val(); + BasicBlock *BB = Iter->getParent(); + + bool FoundTapirRTStart = false; + bool FoundTapirRTEnd = false; + SmallVector EndIters; + // Scan the BB for tapir_runtime calls. + for (BasicBlock::iterator It = Iter, E = BB->end(); It != E; ++It) { + Instruction *I = &*It; + if (isTapirIntrinsic(Intrinsic::tapir_runtime_start, I)) { + FoundTapirRTStart = true; + TapirRTCalls.push_back(cast(I)); + // Examine corresponding tapir_runtime_end intrinsics to find blocks + // from which to continue search. + for (Use &U : I->uses()) { + if (Instruction *UI = dyn_cast(U.getUser())) { + FoundTapirRTEnd = true; + BasicBlock *EndBB = UI->getParent(); + assert(TFBlocks.count(EndBB) && "tapir_runtime_end not in same " + "taskframe as tapir_runtime_begin"); + EndIters.push_back(++UI->getIterator()); + } + } + + if (FoundTapirRTEnd) + // We found a tapir_runtime_begin in this block, so stop searching. + break; + } + } + + // If we didn't find a tapir_runtime_start in this block, treat this block + // as an end block, so we examine its direct successors. + if (!FoundTapirRTStart) + EndIters.push_back(BB->getTerminator()->getIterator()); + + // Examine all end blocks to 1) check if a spawn occurs, and 2) add + // successors within the taskframe for further search. + for (BasicBlock::iterator Iter : EndIters) { + if (isa(*Iter)) { + // We found a spawn terminating a block in this taskframe. This spawn + // is not contained between outermost tapir_runtime_{start,end} calls in + // the taskframe. Therefore, we should fall back to default behavior + // for inserting enter_frame and leave_frame calls for this taskframe. + TapirRTCalls.clear(); + return true; + } + + BasicBlock *EndBB = Iter->getParent(); + if (EndBB->getTerminator() != &*Iter) { + Worklist.push_back(Iter); + continue; + } + + // Add the successors of this block for further search. + for (BasicBlock *Succ : successors(EndBB)) { + if (TFBlocks.count(Succ) && Visited.insert(Succ).second) + // For successors within the taskframe, add them to the search. + Worklist.push_back(Succ->begin()); + else { + // For successors in other taskframes, add the subtaskframe for + // processing. + Spindle *SuccSpindle = TI.getSpindleFor(Succ); + if (SuccSpindle->getTaskFrameCreate()) + SuccessorTFs.insert(SuccSpindle); + } + } + } + } + + return false; +} + +// Find all tapir.runtime.{start,end} intrinsics to process for the taskframe +// rooted at spindle \p TaskFrame and any subtaskframes thereof. +void OpenCilkABI::GetTapirRTCalls(Spindle *TaskFrame, bool IsRootTask, + TaskInfo &TI) { + BasicBlock *TFEntry = TaskFrame->getEntry(); + SmallPtrSet TFBlocks; + SmallVector SubTFs; + if (IsRootTask) { + // We have to compute the effective taskframe blocks for the root task, + // since these blocks are not automatically identified by TapirTaskInfo. + // + // Note: We could generalize TapirTaskInfo to compute these taskframe blocks + // directly, but this computation seems to be the only place that set of + // blocks is needed. + SmallPtrSet ExcludedSpindles; + // Exclude all spindles in unassociated taskframes under the root task. + for (Spindle *TFRoot : TI.getRootTask()->taskframe_roots()) { + if (!TFRoot->getTaskFromTaskFrame()) + SubTFs.push_back(TFRoot); + for (Spindle *TFSpindle : depth_first>(TFRoot)) { + if (TFSpindle->getTaskFromTaskFrame()) + continue; + + for (Spindle *S : TFSpindle->taskframe_spindles()) + ExcludedSpindles.insert(S); + } + } + + // Iterate over the spindles in the root task, and add all spindle blocks to + // TFBlocks as long as those blocks don't belong to a nested taskframe. + for (Spindle *S : + depth_first>(TI.getRootTask()->getEntrySpindle())) { + if (ExcludedSpindles.count(S)) + continue; + + TFBlocks.insert(S->block_begin(), S->block_end()); + } + } else { + // Add all blocks in all spindles associated with this taskframe. + for (Spindle *S : TaskFrame->taskframe_spindles()) + TFBlocks.insert(S->block_begin(), S->block_end()); + + for (Spindle *SubTF : TaskFrame->subtaskframes()) + if (!SubTF->getTaskFromTaskFrame()) + SubTFs.push_back(SubTF); + } + + // Find the outermost tapir_runtime_{start,end} calls in this taskframe. + // Record in SuccessorTFs any subtaskframes that are not enclosed in + // tapir.runtime.{start,end} intrinsics. + SmallPtrSet SuccessorTFs; + bool TaskFrameSpawns = findOutermostTapirRTCallsForTaskFrame( + TapirRTCalls[TFEntry], TFEntry, TFBlocks, SuccessorTFs, TI); + + // If this taskframe spawns outside of tapir_runtime_{start,end} pairs, then + // the taskframe will start/end the runtime when executed. Hence there's no + // need to evaluate subtaskframes. + if (TaskFrameSpawns) + return; + + // Process subtaskframes recursively. + for (Spindle *SubTF : SubTFs) { + // Skip any subtaskframes that are already enclosed in + // tapir.runtime.{start,end} intrinsics. + if (!SuccessorTFs.count(SubTF)) + continue; + + // Skip any taskframes that are associated with subtasks. + assert(!SubTF->getTaskFromTaskFrame() && + "Should not be processing spawned taskframes."); + + GetTapirRTCalls(SubTF, false, TI); + } +} + +bool OpenCilkABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return false; + + // Find all Tapir-runtime calls in this function that may be translated to + // enter_frame/leave_frame calls. + GetTapirRTCalls(TI.getRootTask()->getEntrySpindle(), true, TI); + + if (!TI.isSerial() || TapirRTCalls[&F.getEntryBlock()].empty()) + return false; + + MarkSpawner(F); + LowerTapirRTCalls(F, &F.getEntryBlock()); + return false; +} + +void OpenCilkABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any postprocessing when outlining Tapir loops. + return; + + if (!DebugABICalls) + inlineCilkFunctions(F, CallsToInline); +} + +/// Process the Tapir instructions in an ordinary (non-spawning and not spawned) +/// function \p F directly. +bool OpenCilkABI::processOrdinaryFunction(Function &F, BasicBlock *TFEntry) { + // Get the simple Tapir instructions to process, including syncs and + // loop-grainsize calls. + SmallVector GrainsizeCalls; + SmallVector TaskFrameAddrCalls; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // Record calls to get Tapir-loop grainsizes. + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::tapir_loop_grainsize == II->getIntrinsicID()) + GrainsizeCalls.push_back(II); + + // Record calls to task_frameaddr intrinsics. + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::task_frameaddress == II->getIntrinsicID()) + TaskFrameAddrCalls.push_back(II); + } + } + + // Lower simple Tapir instructions in this function. Collect the set of + // helper functions generated by this process. + bool Changed = false; + + // Lower calls to get Tapir-loop grainsizes. + while (!GrainsizeCalls.empty()) { + CallInst *GrainsizeCall = GrainsizeCalls.pop_back_val(); + LLVM_DEBUG(dbgs() << "Lowering grainsize call " << *GrainsizeCall << "\n"); + lowerGrainsizeCall(GrainsizeCall); + Changed = true; + } + + // Lower calls to task_frameaddr intrinsics. + while (!TaskFrameAddrCalls.empty()) { + CallInst *TaskFrameAddrCall = TaskFrameAddrCalls.pop_back_val(); + LLVM_DEBUG(dbgs() << "Lowering task_frameaddr call " << *TaskFrameAddrCall + << "\n"); + lowerTaskFrameAddrCall(TaskFrameAddrCall); + Changed = true; + } + + // If any calls to tapir.runtime.{start,end} were found in this taskframe that + // need processing, lower them now. + if (!TapirRTCalls[TFEntry].empty()) { + LowerTapirRTCalls(F, TFEntry); + Changed = true; + } + + return Changed; +} + +void OpenCilkABI::postProcessHelper(Function &F) {} + +LoopOutlineProcessor *OpenCilkABI::getLoopOutlineProcessor( + const TapirLoopInfo *TL) const { + if (UseRuntimeCilkFor) + return new RuntimeCilkFor(M); + return nullptr; +} + +void OpenCilkABI::lowerReducerOperation(CallBase *CI) { + FunctionCallee Fn = nullptr; + const Function *Called = CI->getCalledFunction(); + assert(Called); + Intrinsic::ID ID = Called->getIntrinsicID(); + switch (ID) { + default: + llvm_unreachable("unexpected reducer intrinsic"); + case Intrinsic::hyper_lookup: + Fn = Get__cilkrts_reducer_lookup(); + break; + case Intrinsic::reducer_register: { + const Type *SizeType = CI->getArgOperand(1)->getType(); + assert(isa(SizeType)); + Fn = Get__cilkrts_reducer_register(SizeType->getIntegerBitWidth()); + assert(Fn); + break; + } + case Intrinsic::reducer_unregister: + Fn = Get__cilkrts_reducer_unregister(); + break; + } + CI->setCalledFunction(Fn); +} diff --git a/llvm/lib/Transforms/Tapir/Outline.cpp b/llvm/lib/Transforms/Tapir/Outline.cpp new file mode 100644 index 00000000000000..952a72f82be7b0 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/Outline.cpp @@ -0,0 +1,567 @@ +//===- TapirOutline.cpp - Outlining for Tapir -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements helper functions for outlining portions of code +// containing Tapir instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/AttributeMask.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/ModRef.h" +#include "llvm/Support/Timer.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "outlining" + +static const char TimerGroupName[] = DEBUG_TYPE; +static const char TimerGroupDescription[] = "Tapir outlining"; + +// Materialize any necessary information in DstM when outlining Tapir into DstM. +Value *OutlineMaterializer::materialize(Value *V) { + if (V == SrcSyncRegion) { + // Create a new sync region to replace the sync region SrcSyncRegion from + // the source. + + // Get the destination function + User *U = *(V->materialized_user_begin()); + Function *DstFunc = cast(U)->getFunction(); + // Add a new syncregion to the entry block of the destination function + Instruction *NewSyncReg = cast(SrcSyncRegion)->clone(); + BasicBlock *EntryBlock = &DstFunc->getEntryBlock(); + NewSyncReg->insertInto(EntryBlock, EntryBlock->end()); + // Record the entry block as needing remapping + BlocksToRemap.insert(EntryBlock); + return NewSyncReg; + } + + return nullptr; +} + +/// Clone Blocks into NewFunc, transforming the old arguments into references to +/// VMap values. +/// +/// This logic is based on CloneFunctionInto, defined in +/// Transforms/Utils/CloneFunction, but with additional functionality specific +/// to Tapir outlining. +void llvm::CloneIntoFunction(Function *NewFunc, const Function *OldFunc, + std::vector Blocks, + ValueToValueMapTy &VMap, bool ModuleLevelChanges, + SmallVectorImpl &Returns, + const StringRef NameSuffix, + SmallPtrSetImpl *ReattachBlocks, + SmallPtrSetImpl *TaskResumeBlocks, + SmallPtrSetImpl *SharedEHEntries, + DISubprogram *SP, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, + OutlineMaterializer *Materializer) { + // Get the predecessors of the exit blocks + SmallPtrSet EHEntryPreds, ClonedEHEntryPreds; + if (SharedEHEntries) + for (BasicBlock *EHEntry : *SharedEHEntries) + for (BasicBlock *Pred : predecessors(EHEntry)) + EHEntryPreds.insert(Pred); + + // When we remap instructions, we want to avoid duplicating inlined + // DISubprograms, so record all subprograms we find as we duplicate + // instructions and then freeze them in the MD map. + // We also record information about dbg.value and dbg.declare to avoid + // duplicating the types. + DebugInfoFinder DIFinder; + + // Loop over all of the basic blocks in the function, cloning them as + // appropriate. + { + NamedRegionTimer NRT("CloneBlocks", "Clone basic blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (const BasicBlock *BB : Blocks) { + // Record all exit block predecessors that are cloned. + if (EHEntryPreds.count(BB)) + ClonedEHEntryPreds.insert(BB); + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = CloneBasicBlock(BB, VMap, NameSuffix, NewFunc, CodeInfo, + SP ? &DIFinder : nullptr); + + // Add basic block mapping. + VMap[BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when cloning a + // function.) + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), + const_cast(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. + if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) + Returns.push_back(RI); + } + } // end timed region + + // For each exit block, clean up its phi nodes to exclude predecessors that + // were not cloned. Also replace detached_rethrow invokes with resumes. + if (SharedEHEntries) { + NamedRegionTimer NRT("FixupSharedEH", "Fixup shared EH blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (BasicBlock *EHEntry : *SharedEHEntries) { + if (!VMap[EHEntry]) + continue; + + // Get the predecessors of this exit block that were not cloned. + SmallVector PredNotCloned; + for (BasicBlock *Pred : predecessors(EHEntry)) + if (!ClonedEHEntryPreds.count(Pred)) + PredNotCloned.push_back(Pred); + + // Iterate over the phi nodes in the cloned exit block and remove incoming + // values from predecessors that were not cloned. + BasicBlock *ClonedEHEntry = cast(VMap[EHEntry]); + BasicBlock::iterator BI = ClonedEHEntry->begin(); + while (PHINode *PN = dyn_cast(BI)) { + for (BasicBlock *DeadPred : PredNotCloned) + if (PN->getBasicBlockIndex(DeadPred) > -1) + PN->removeIncomingValue(DeadPred); + ++BI; + } + } + } + if (ReattachBlocks) { + NamedRegionTimer NRT("FixupReattach", "Fixup reattach blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (BasicBlock *ReattachBlk : *ReattachBlocks) { + BasicBlock *ClonedRB = cast(VMap[ReattachBlk]); + // Don't get the remapped name of this successor yet. Subsequent + // remapping will take correct the name. + BasicBlock *Succ = ClonedRB->getSingleSuccessor(); + ReplaceInstWithInst(ClonedRB->getTerminator(), + BranchInst::Create(Succ)); + } + } + if (TaskResumeBlocks) { + NamedRegionTimer NRT("FixupTaskResume", "Fixup task-resume blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (BasicBlock *TaskResumeBlk : *TaskResumeBlocks) { + // Skip blocks that are not terminated by a detached.rethrow or + // taskframe.resume. + if (!isDetachedRethrow(TaskResumeBlk->getTerminator()) && + !isTaskFrameResume(TaskResumeBlk->getTerminator())) + continue; + + BasicBlock *ClonedDRB = cast(VMap[TaskResumeBlk]); + // If this exit block terminates in a detached.rethrow or + // taskframe.resume, replace the terminator with a resume. + InvokeInst *II = cast(ClonedDRB->getTerminator()); + Value *RethrowArg = II->getArgOperand(1); + ReplaceInstWithInst(ClonedDRB->getTerminator(), + ResumeInst::Create(RethrowArg)); + } + } + + { + NamedRegionTimer NRT("MapMetadata", "Map function metadata", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (DISubprogram *ISP : DIFinder.subprograms()) + if (ISP != SP) + VMap.MD()[ISP].reset(ISP); + + for (DICompileUnit *CU : DIFinder.compile_units()) + VMap.MD()[CU].reset(CU); + + for (DIType *Type : DIFinder.types()) + VMap.MD()[Type].reset(Type); + + // Duplicate the metadata that is attached to the cloned function. + // Subprograms/CUs/types that were already mapped to themselves won't be + // duplicated. + SmallVector, 1> MDs; + OldFunc->getAllMetadata(MDs); + for (auto MD : MDs) { + NewFunc->addMetadata( + MD.first, + *MapMetadata(MD.second, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + } + } // end timed region + + // Loop over all of the instructions in the function, fixing up operand + // references as we go. This uses VMap to do all the hard work. + { + NamedRegionTimer NRT("RemapBlock", "Remap instructions in block", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (const BasicBlock *BB : Blocks) { + BasicBlock *CBB = cast(VMap[BB]); + LLVM_DEBUG(dbgs() << "In block " << CBB->getName() << "\n"); + // Loop over all instructions, fixing each one as we find it... + for (Instruction &II : *CBB) { + LLVM_DEBUG(dbgs() << " Remapping " << II << "\n"); + RemapInstruction(&II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + } + + // Remapping instructions could cause the Materializer to insert new + // instructions in the entry block. Now remap the instructions in the entry + // block. + if (Materializer) + while (!Materializer->BlocksToRemap.empty()) { + BasicBlock *BB = Materializer->BlocksToRemap.pop_back_val(); + for (Instruction &II : *BB) { + LLVM_DEBUG(dbgs() << " Remapping " << II << "\n"); + RemapInstruction(&II, VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer); + } + } + } // end timed region + + // Register all DICompileUnits of the old parent module in the new parent + // module + auto *OldModule = OldFunc->getParent(); + auto *NewModule = NewFunc->getParent(); + if (OldModule && NewModule && OldModule != NewModule && + DIFinder.compile_unit_count()) { + auto *NMD = NewModule->getOrInsertNamedMetadata("llvm.dbg.cu"); + // Avoid multiple insertions of the same DICompileUnit to NMD. + SmallPtrSet Visited; + for (auto *Operand : NMD->operands()) + Visited.insert(Operand); + for (auto *Unit : DIFinder.compile_units()) + // VMap.MD()[Unit] == Unit + if (Visited.insert(Unit).second) + NMD->addOperand(Unit); + } +} + +/// Create a helper function whose signature is based on Inputs and +/// Outputs as follows: f(in0, ..., inN, out0, ..., outN) +/// +/// This logic is based on CloneFunctionInto, defined in +/// Transforms/Utils/CloneFunction, but with additional functionality specific +/// to Tapir outlining. +Function *llvm::CreateHelper( + const ValueSet &Inputs, const ValueSet &Outputs, + std::vector Blocks, BasicBlock *Header, + const BasicBlock *OldEntry, const BasicBlock *OldExit, + ValueToValueMapTy &VMap, Module *DestM, bool ModuleLevelChanges, + SmallVectorImpl &Returns, const StringRef NameSuffix, + SmallPtrSetImpl *ReattachBlocks, + SmallPtrSetImpl *DetachRethrowBlocks, + SmallPtrSetImpl *SharedEHEntries, + const BasicBlock *OldUnwind, + SmallPtrSetImpl *UnreachableExits, + Type *ReturnType, ClonedCodeInfo *CodeInfo, + ValueMapTypeRemapper *TypeMapper, OutlineMaterializer *Materializer) { + LLVM_DEBUG(dbgs() << "inputs: " << Inputs.size() << "\n"); + LLVM_DEBUG(dbgs() << "outputs: " << Outputs.size() << "\n"); + + Function *OldFunc = Header->getParent(); + Type *RetTy = ReturnType; + bool VoidRet = false; + if (!RetTy) + RetTy = Type::getVoidTy(Header->getContext()); + if (Type::getVoidTy(Header->getContext()) == RetTy) + VoidRet = true; + + std::vector paramTy; + + // Add the types of the input values to the function's argument list + for (Value *value : Inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); + paramTy.push_back(value->getType()); + } + + // Add the types of the output values to the function's argument list. + for (Value *output : Outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); + paramTy.push_back(PointerType::getUnqual(output->getType())); + } + + LLVM_DEBUG({ + dbgs() << "Function type: " << *RetTy << " f("; + for (Type *i : paramTy) + dbgs() << *i << ", "; + dbgs() << ")\n"; + }); + + FunctionType *FTy = FunctionType::get(RetTy, paramTy, false); + + // Create the new function + Function *NewFunc = Function::Create( + FTy, OldFunc->getLinkage(), + OldFunc->getName() + ".outline_" + Header->getName() + NameSuffix, DestM); + + // Set names for input and output arguments. At the same time, analyze + // notable arguments, such as vector arguments. + bool VectorArg = false; + uint64_t MaxVectorArgWidth = 0; + Function::arg_iterator DestI = NewFunc->arg_begin(); + for (Value *I : Inputs) { + if (VMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()+NameSuffix); // Copy the name over... + VMap[I] = &*DestI++; // Add mapping to VMap + } + // Check for any vector arguments, and record the maximum width of any + // vector argument we find. + if (VectorType *VT = dyn_cast(I->getType())) { + VectorArg = true; + ElementCount EC = VT->getElementCount(); + if (EC.isScalable()) + // If we have a scalable vector, give up. + MaxVectorArgWidth = std::numeric_limits::max(); + else { + unsigned VectorArgWidth = + EC.getKnownMinValue() * VT->getScalarSizeInBits(); + if (MaxVectorArgWidth < VectorArgWidth) + MaxVectorArgWidth = VectorArgWidth; + } + } + } + for (Value *I : Outputs) + if (VMap.count(I) == 0) { // Is this argument preserved? + DestI->setName(I->getName()+NameSuffix); // Copy the name over... + VMap[I] = &*DestI++; // Add mapping to VMap + } + + // Copy all attributes other than those stored in the AttributeSet. We need + // to remap the parameter indices of the AttributeSet. + AttributeList NewAttrs = NewFunc->getAttributes(); + NewFunc->copyAttributesFrom(OldFunc); + NewFunc->setAttributes(NewAttrs); + + // Fix up the personality function that got copied over. + if (OldFunc->hasPersonalityFn()) + NewFunc->setPersonalityFn( + MapValue(OldFunc->getPersonalityFn(), VMap, + ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges, + TypeMapper, Materializer)); + + SmallVector NewArgAttrs(NewFunc->arg_size()); + AttributeList OldAttrs = OldFunc->getAttributes(); + + // Clone any argument attributes + { + NamedRegionTimer NRT("CloneArgAttrs", "Clone argument attributes", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (Argument &OldArg : OldFunc->args()) { + // Check if we're passing this argument to the helper. We check Inputs here + // instead of the VMap to avoid potentially populating the VMap with a null + // entry for the old argument. + if (Inputs.count(&OldArg) || Outputs.count(&OldArg)) { + Argument *NewArg = dyn_cast(VMap[&OldArg]); + NewArgAttrs[NewArg->getArgNo()] = + OldAttrs.getParamAttrs(OldArg.getArgNo()) + .removeAttribute(NewFunc->getContext(), Attribute::Returned); + } + } + } // end timed region + + NewFunc->setAttributes( + AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttrs(), + OldAttrs.getRetAttrs(), NewArgAttrs)); + + // Remove prologue data + if (NewFunc->hasPrologueData()) + NewFunc->setPrologueData(nullptr); + + // Remove old return attributes. + NewFunc->removeRetAttrs( + AttributeFuncs::typeIncompatible(NewFunc->getReturnType())); + + // Update vector-related attributes in the caller and new function + if (VectorArg && OldFunc->hasFnAttribute("min-legal-vector-width")) { + uint64_t CallerVectorWidth; + OldFunc->getFnAttribute("min-legal-vector-width") + .getValueAsString() + .getAsInteger(0, CallerVectorWidth); + if (std::numeric_limits::max() == MaxVectorArgWidth) { + // MaxVectorArgWidth is not a finite value. Give up and remove the + // min-legal-vector-width attribute, so OldFunc wil be treated + // conservatively henceforth. + OldFunc->removeFnAttr("min-legal-vector-width"); + // Update the min-legal-vector-width in the new function as well + NewFunc->removeFnAttr("min-legal-vector-width"); + } else if (MaxVectorArgWidth > CallerVectorWidth) { + // If MaxVectorArgWidth is a finite value and larger than the + // min-legal-vector-width of OldFunc, then set the + // min-legal-vector-width of OldFunc to match MaxVectorArgWidth. + OldFunc->addFnAttr("min-legal-vector-width", + llvm::utostr(MaxVectorArgWidth)); + // Update the min-legal-vector-width in the new function + NewFunc->addFnAttr("min-legal-vector-width", + llvm::utostr(MaxVectorArgWidth)); + } + } + + // Clone the metadata from the old function into the new. + bool MustCloneSP = OldFunc->getParent() && OldFunc->getParent() == DestM; + DISubprogram *SP = OldFunc->getSubprogram(); + if (SP) { + assert(!MustCloneSP || ModuleLevelChanges); + // Add mappings for some DebugInfo nodes that we don't want duplicated + // even if they're distinct. + auto &MD = VMap.MD(); + MD[SP->getUnit()].reset(SP->getUnit()); + MD[SP->getType()].reset(SP->getType()); + MD[SP->getFile()].reset(SP->getFile()); + // If we're not cloning into the same module, no need to clone the + // subprogram + if (!MustCloneSP) + MD[SP].reset(SP); + } + + // If the outlined function has pointer arguments its memory effects are + // unknown. Otherwise it inherits the memory effects of its parent. + // The caller can improve on this if desired. + for (Argument &Arg : NewFunc->args()) { + if (Arg.getType()->isPointerTy()) { + NewFunc->removeFnAttr(Attribute::Memory); + break; + } + } + + // Inherit the calling convention from the parent. + NewFunc->setCallingConv(OldFunc->getCallingConv()); + + // The new function needs a root node because other nodes can branch to the + // head of the region, but the entry node of a function cannot have preds. + BasicBlock *NewEntry = BasicBlock::Create( + Header->getContext(), OldEntry->getName()+NameSuffix, NewFunc); + // The new function also needs an exit node. + BasicBlock *NewExit = BasicBlock::Create( + Header->getContext(), OldExit->getName()+NameSuffix); + + // Add mappings to the NewEntry and NewExit. + VMap[OldEntry] = NewEntry; + VMap[OldExit] = NewExit; + + BasicBlock *NewUnwind = nullptr; + // Create a new unwind destination for the cloned blocks if it's needed. + if (OldUnwind) { + NewUnwind = BasicBlock::Create( + NewFunc->getContext(), OldUnwind->getName()+NameSuffix); + VMap[OldUnwind] = NewUnwind; + } + + // Create an new unreachable exit block, if needed. + BasicBlock *NewUnreachable = nullptr; + if (UnreachableExits && !UnreachableExits->empty()) { + NewUnreachable = BasicBlock::Create( + NewFunc->getContext(), "unreachable"+NameSuffix); + new UnreachableInst(NewFunc->getContext(), NewUnreachable); + for (BasicBlock *Unreachable : *UnreachableExits) + VMap[Unreachable] = NewUnreachable; + } + + // Clone Blocks into the new function. + CloneIntoFunction(NewFunc, OldFunc, Blocks, VMap, ModuleLevelChanges, + Returns, NameSuffix, ReattachBlocks, DetachRethrowBlocks, + SharedEHEntries, SP, CodeInfo, TypeMapper, Materializer); + + // Add a branch in the new function to the cloned Header. + BasicBlock *ClonedHeader = cast(VMap[Header]); + BranchInst *EntryBr = BranchInst::Create(ClonedHeader, NewEntry); + // Set the debug location of the entry branch to match the first debug + // location in the cloned header. + for (const Instruction &I : *ClonedHeader) + if (const DebugLoc &Loc = I.getDebugLoc()) { + EntryBr->setDebugLoc(Loc); + break; + } + + // Insert the new exit block, terminated by a return. + NewExit->insertInto(NewFunc); + // Add a return in the new function, with a default null value if necessary. + ReturnInst *NewRet; + if (VoidRet) + NewRet = ReturnInst::Create(Header->getContext(), NewExit); + else + NewRet = ReturnInst::Create(Header->getContext(), + Constant::getNullValue(RetTy), NewExit); + // Set the debug location of the ret to match the debug location of some + // corresponding reattach. + for (const BasicBlock *Pred : predecessors(NewExit)) + if (const DebugLoc &Loc = Pred->getTerminator()->getDebugLoc()) { + NewRet->setDebugLoc(Loc); + break; + } + + // If needed, create a landingpad and resume for the unwind destination in the + // new function. + if (OldUnwind) { + NewUnwind->insertInto(NewFunc); + LandingPadInst *LPad = + LandingPadInst::Create(OldUnwind->getLandingPadInst()->getType(), 0, + "lpadval", NewUnwind); + LPad->setCleanup(true); + ResumeInst *NewResume = ResumeInst::Create(LPad, NewUnwind); + // Set the debug location of the resume to match the debug location of some + // corresponding detached_rethrow. + for (const BasicBlock *Pred : predecessors(NewUnwind)) + if (const DebugLoc &Loc = Pred->getTerminator()->getDebugLoc()) { + NewResume->setDebugLoc(Loc); + break; + } + } + + // If needed, add the new unreachable destination. + if (NewUnreachable) + NewUnreachable->insertInto(NewFunc); + + return NewFunc; +} + +// Add alignment assumptions to parameters of outlined function, based on known +// alignment data in the caller. +void llvm::AddAlignmentAssumptions( + const Function *Caller, const ValueSet &Args, ValueToValueMapTy &VMap, + const Instruction *CallSite, AssumptionCache *AC, DominatorTree *DT) { + NamedRegionTimer NRT("AddAlignmentAssumptions", "Add alignment assumptions", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + auto &DL = Caller->getParent()->getDataLayout(); + for (Value *ArgVal : Args) { + // Ignore arguments to non-pointer types + if (!ArgVal->getType()->isPointerTy()) continue; + Argument *Arg = cast(VMap[ArgVal]); + // Ignore arguments to non-pointer types + if (!Arg->getType()->isPointerTy()) continue; + // If the argument already has an alignment attribute, skip it. + if (Arg->getParamAlign()) continue; + // Get any known alignment information for this argument's value. + Align Alignment = getKnownAlignment(ArgVal, DL, CallSite, AC, DT); + // If we have alignment data, add it as an attribute to the outlined + // function's parameter. + if (Alignment >= 1) + Arg->addAttr(Attribute::getWithAlignment(Arg->getContext(), Alignment)); + } +} diff --git a/llvm/lib/Transforms/Tapir/QthreadsABI.cpp b/llvm/lib/Transforms/Tapir/QthreadsABI.cpp new file mode 100644 index 00000000000000..3fc6c761b69a37 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/QthreadsABI.cpp @@ -0,0 +1,350 @@ +//===- QthreadsABI.cpp - Lower Tapir into Qthreads runtime system calls -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the QthreadsABI interface, which is used to convert +// Tapir instructions to calls into the Qthreads runtime system. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/QthreadsABI.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Tapir/Outline.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "qthreadsabi" + +static cl::opt UseCopyargs( + "qthreads-use-fork-copyargs", cl::init(false), cl::Hidden, + cl::desc("Use copyargs variant of fork")); + +// Accessors for opaque Qthreads RTS functions +FunctionCallee QthreadsABI::get_qthread_num_workers() { + if (QthreadNumWorkers) + return QthreadNumWorkers; + + LLVMContext &C = M.getContext(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get(Type::getInt16Ty(C), {}, false); + QthreadNumWorkers = M.getOrInsertFunction("qthread_num_workers", FTy, AL); + return QthreadNumWorkers; +} + +FunctionCallee QthreadsABI::get_qthread_fork_copyargs() { + if (QthreadForkCopyargs) + return QthreadForkCopyargs; + + LLVMContext &C = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getInt32Ty(C), + { QthreadFTy, // qthread_f f + Type::getInt8PtrTy(C), // const void *arg + DL.getIntPtrType(C), // size_t arg_size + Type::getInt64PtrTy(C) // aligned_t *ret + }, false); + + QthreadForkCopyargs = M.getOrInsertFunction("qthread_fork_copyargs", FTy, AL); + return QthreadForkCopyargs; +} + +FunctionCallee QthreadsABI::get_qthread_initialize() { + if (QthreadInitialize) + return QthreadInitialize; + + LLVMContext &C = M.getContext(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getInt32Ty(C), {}, false); + + QthreadInitialize = M.getOrInsertFunction("qthread_initialize", FTy, AL); + return QthreadInitialize; +} + +FunctionCallee QthreadsABI::get_qt_sinc_create() { + if (QtSincCreate) + return QtSincCreate; + + LLVMContext &C = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getInt8PtrTy(C), + { DL.getIntPtrType(C), // size_t size + Type::getInt8PtrTy(C), // void *initval + Type::getInt8PtrTy(C), // void *op + DL.getIntPtrType(C) // size_t expect + }, + false); + + QtSincCreate = M.getOrInsertFunction("qt_sinc_create", FTy, AL); + return QtSincCreate; +} + +FunctionCallee QthreadsABI::get_qt_sinc_expect() { + if (QtSincExpect) + return QtSincExpect; + + LLVMContext &C = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getVoidTy(C), + { Type::getInt8PtrTy(C), // sync_t *s + DL.getIntPtrType(C) // size_t incr + }, + false); + + QtSincExpect = M.getOrInsertFunction("qt_sinc_expect", FTy, AL); + return QtSincExpect; +} + +FunctionCallee QthreadsABI::get_qt_sinc_submit() { + if (QtSincSubmit) + return QtSincSubmit; + + LLVMContext &C = M.getContext(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getVoidTy(C), + { Type::getInt8PtrTy(C), // sync_t *s + Type::getInt8PtrTy(C) // void *val + }, + false); + + QtSincSubmit = M.getOrInsertFunction("qt_sinc_submit", FTy, AL); + return QtSincSubmit; +} + +FunctionCallee QthreadsABI::get_qt_sinc_wait() { + if (QtSincWait) + return QtSincWait; + + LLVMContext &C = M.getContext(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getVoidTy(C), + { Type::getInt8PtrTy(C), // sync_t *s + Type::getInt8PtrTy(C) // void *target + }, + false); + + QtSincWait = M.getOrInsertFunction("qt_sinc_wait", FTy, AL); + return QtSincWait; +} + +FunctionCallee QthreadsABI::get_qt_sinc_destroy() { + if (QtSincDestroy) + return QtSincDestroy; + + LLVMContext &C = M.getContext(); + AttributeList AL; + // TODO: Set appropriate function attributes. + FunctionType *FTy = FunctionType::get( + Type::getVoidTy(C), + { Type::getInt8PtrTy(C), // sync_t *s + }, + false); + + QtSincDestroy = M.getOrInsertFunction("qt_sinc_destroy", FTy, AL); + return QtSincDestroy; +} + +#define QTHREAD_FUNC(name) get_##name() + +QthreadsABI::QthreadsABI(Module &M) : TapirTarget(M) { + LLVMContext &C = M.getContext(); + // Initialize any types we need for lowering. + QthreadFTy = PointerType::getUnqual( + FunctionType::get(Type::getInt64Ty(C), { Type::getInt8PtrTy(C) }, false)); +} + +/// Lower a call to get the grainsize of this Tapir loop. +/// +/// The grainsize is computed by the following equation: +/// +/// Grainsize = min(2048, ceil(Limit / (8 * workers))) +/// +/// This computation is inserted into the preheader of the loop. +Value *QthreadsABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Limit = GrainsizeCall->getArgOperand(0); + IRBuilder<> Builder(GrainsizeCall); + + // Get 8 * workers + Value *Workers = Builder.CreateCall(QTHREAD_FUNC(qthread_num_workers)); + Value *WorkersX8 = Builder.CreateIntCast( + Builder.CreateMul(Workers, ConstantInt::get(Workers->getType(), 8)), + Limit->getType(), false); + // Compute ceil(limit / 8 * workers) = + // (limit + 8 * workers - 1) / (8 * workers) + Value *SmallLoopVal = + Builder.CreateUDiv(Builder.CreateSub(Builder.CreateAdd(Limit, WorkersX8), + ConstantInt::get(Limit->getType(), 1)), + WorkersX8); + // Compute min + Value *LargeLoopVal = ConstantInt::get(Limit->getType(), 2048); + Value *Cmp = Builder.CreateICmpULT(LargeLoopVal, SmallLoopVal); + Value *Grainsize = Builder.CreateSelect(Cmp, LargeLoopVal, SmallLoopVal); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +Value *QthreadsABI::getOrCreateSinc(Value *SyncRegion, Function *F) { + LLVMContext &C = M.getContext(); + Value* sinc; + if((sinc = SyncRegionToSinc[SyncRegion])) + return sinc; + else { + Value* zero = ConstantInt::get(Type::getInt64Ty(C), 0); + Value* null = Constant::getNullValue(Type::getInt8PtrTy(C)); + std::vector createArgs = {zero, null, null, zero}; + sinc = CallInst::Create(QTHREAD_FUNC(qt_sinc_create), createArgs, "", + F->getEntryBlock().getTerminator()); + SyncRegionToSinc[SyncRegion] = sinc; + + // Make sure we destroy the sinc at all exit points to prevent memory leaks + for(BasicBlock &BB : *F) { + if(isa(BB.getTerminator())){ + CallInst::Create(QTHREAD_FUNC(qt_sinc_destroy), {sinc}, "", + BB.getTerminator()); + } + } + + return sinc; + } +} + +void QthreadsABI::lowerSync(SyncInst &SI) { + IRBuilder<> builder(&SI); + auto F = SI.getParent()->getParent(); + auto& C = M.getContext(); + auto null = Constant::getNullValue(Type::getInt8PtrTy(C)); + Value* SR = SI.getSyncRegion(); + auto sinc = getOrCreateSinc(SR, F); + std::vector args = {sinc, null}; + auto sincwait = QTHREAD_FUNC(qt_sinc_wait); + builder.CreateCall(sincwait, args); + BranchInst *PostSync = BranchInst::Create(SI.getSuccessor(0)); + ReplaceInstWithInst(&SI, PostSync); +} + +void QthreadsABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { + Function *Outlined = TOI.Outline; + Instruction *ReplStart = TOI.ReplStart; + CallBase *ReplCall = cast(TOI.ReplCall); + BasicBlock *CallBlock = ReplStart->getParent(); + + LLVMContext &C = M.getContext(); + const DataLayout &DL = M.getDataLayout(); + + // At this point, we have a call in the parent to a function containing the + // task body. That function takes as its argument a pointer to a structure + // containing the inputs to the task body. This structure is initialized in + // the parent immediately before the call. + + // To match the Qthreads ABI, we replace the existing call with a call to + // qthreads_fork_copyargs. + IRBuilder<> CallerIRBuilder(ReplCall); + Value *OutlinedFnPtr = CallerIRBuilder.CreatePointerBitCastOrAddrSpaceCast( + Outlined, QthreadFTy); + AllocaInst *CallerArgStruct = cast(ReplCall->getArgOperand(0)); + Type *ArgsTy = CallerArgStruct->getAllocatedType(); + Value *ArgStructPtr = CallerIRBuilder.CreateBitCast(CallerArgStruct, + Type::getInt8PtrTy(C)); + Constant *Null = Constant::getNullValue(Type::getInt64PtrTy(C)); + ConstantInt *ArgSize = ConstantInt::get(DL.getIntPtrType(C), + DL.getTypeAllocSize(ArgsTy)); + CallInst *Call = CallerIRBuilder.CreateCall( + QTHREAD_FUNC(qthread_fork_copyargs), { OutlinedFnPtr, ArgStructPtr, + ArgSize, Null }); + Call->setDebugLoc(ReplCall->getDebugLoc()); + TOI.replaceReplCall(Call); + ReplCall->eraseFromParent(); + + // Add lifetime intrinsics for the argument struct. TODO: Move this logic + // into underlying LoweringUtils routines? + CallerIRBuilder.SetInsertPoint(ReplStart); + CallerIRBuilder.CreateLifetimeStart(CallerArgStruct, ArgSize); + CallerIRBuilder.SetInsertPoint(CallBlock, ++Call->getIterator()); + CallerIRBuilder.CreateLifetimeEnd(CallerArgStruct, ArgSize); + + if (TOI.ReplUnwind) + // We assume that qthread_fork_copyargs dealt with the exception. But + // replacing the invocation of the helper function with the call to + // qthread_fork_copyargs will remove the terminator from CallBlock. Restore + // that terminator here. + BranchInst::Create(TOI.ReplRet, CallBlock); + + // VERIFY: If we're using fork_copyargs, we don't need a separate helper + // function to manage the allocation of the argument structure. +} + +bool QthreadsABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return false; + + LLVMContext &C = M.getContext(); + for (Task *T : post_order(TI.getRootTask())) { + if (T->isRootTask()) + continue; + DetachInst *Detach = T->getDetach(); + BasicBlock *detB = Detach->getParent(); + BasicBlock *Spawned = T->getEntry(); + Value *SR = Detach->getSyncRegion(); + Value *sinc = getOrCreateSinc(SR, &F); + + // Add an expect increment before spawning + IRBuilder<> preSpawnB(detB); + Value* one = ConstantInt::get(Type::getInt64Ty(C), 1); + std::vector expectArgs = {sinc, one}; + CallInst::Create(QTHREAD_FUNC(qt_sinc_expect), expectArgs, "", Detach); + + // Add a submit to end of task body + // + // TB: I would interpret the above comment to mean we want qt_sinc_submit() + // before the task terminates. But the code I see for inserting + // qt_sinc_submit just inserts the call at the end of the entry block of the + // task, which is not necessarily the end of the task. I kept the code I + // found, but I'm not sure if it is correct. + IRBuilder<> footerB(Spawned->getTerminator()); + Value* null = Constant::getNullValue(Type::getInt8PtrTy(C)); + std::vector submitArgs = {sinc, null}; + footerB.CreateCall(QTHREAD_FUNC(qt_sinc_submit), submitArgs); + } + return false; +} + +void QthreadsABI::postProcessFunction(Function &F, bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return; + + CallInst::Create(QTHREAD_FUNC(qthread_initialize), "", + F.getEntryBlock().getFirstNonPHIOrDbg()); +} + +void QthreadsABI::postProcessHelper(Function &F) {} + diff --git a/llvm/lib/Transforms/Tapir/SerialABI.cpp b/llvm/lib/Transforms/Tapir/SerialABI.cpp new file mode 100644 index 00000000000000..dc23f3b1e9c177 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/SerialABI.cpp @@ -0,0 +1,52 @@ +//===- SerialABI.cpp - Replace Tapir with serial projection ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the SerialABI interface, which is used to convert Tapir +// instructions into their serial projection. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/SerialABI.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "serialabi" + +Value *SerialABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { + Value *Grainsize = ConstantInt::get(GrainsizeCall->getType(), 1); + + // Replace uses of grainsize intrinsic call with this grainsize value. + GrainsizeCall->replaceAllUsesWith(Grainsize); + return Grainsize; +} + +void SerialABI::lowerSync(SyncInst &SI) { + ReplaceInstWithInst(&SI, BranchInst::Create(SI.getSuccessor(0))); +} + +bool SerialABI::preProcessFunction(Function &F, TaskInfo &TI, + bool ProcessingTapirLoops) { + if (ProcessingTapirLoops) + // Don't do any preprocessing when outlining Tapir loops. + return false; + + bool Changed = false; + for (Task *T : post_order(TI.getRootTask())) { + if (T->isRootTask()) + continue; + DetachInst *DI = T->getDetach(); + SerializeDetach(DI, T); + Changed = true; + } + return Changed; +} + + diff --git a/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp b/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp new file mode 100644 index 00000000000000..52c866c3055454 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp @@ -0,0 +1,216 @@ +//===- SerializeSmallTasks.cpp - Serialize small Tapir tasks --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass serializes Tapir tasks with too little work to justify spawning. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/SerializeSmallTasks.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/WorkSpanAnalysis.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Tapir/LoopStripMine.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "serialize-small-tasks" + +static cl::opt SerializeUnprofitableLoops( + "serialize-unprofitable-loops", cl::Hidden, cl::init(true), + cl::desc("Serialize any Tapir tasks found to be unprofitable.")); + +static bool trySerializeSmallLoop( + Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE, + const TargetTransformInfo &TTI, AssumptionCache &AC, TaskInfo *TI, + OptimizationRemarkEmitter &ORE, TargetLibraryInfo *TLI) { + bool Changed = false; + for (Loop *SubL : *L) + Changed |= trySerializeSmallLoop(SubL, DT, LI, SE, TTI, AC, TI, ORE, TLI); + + Task *T = getTaskIfTapirLoopStructure(L, TI); + if (!T) + return Changed; + + // Skip any loop for which stripmining is explicitly disabled. + if (TM_Disable == hasLoopStripmineTransformation(L)) + return Changed; + + TapirLoopHints Hints(L); + + TargetTransformInfo::StripMiningPreferences SMP = + gatherStripMiningPreferences(L, SE, TTI, std::nullopt); + + SmallPtrSet EphValues; + CodeMetrics::collectEphemeralValues(L, &AC, EphValues); + + WSCost LoopCost; + estimateLoopCost(LoopCost, L, LI, &SE, TTI, TLI, EphValues); + + // If the work in the loop is larger than the maximum value we can deal with, + // then it's not small. + if (LoopCost.UnknownCost) + return Changed; + + computeStripMineCount(L, TTI, LoopCost.Work, SMP); + // Make sure the count is a power of 2. + if (!isPowerOf2_32(SMP.Count)) + SMP.Count = NextPowerOf2(SMP.Count); + + // Find a constant trip count if available + unsigned ConstTripCount = getConstTripCount(L, SE); + + if (!ConstTripCount || SMP.Count < ConstTripCount) + return Changed; + + // Serialize the loop's detach, since it appears to be too small to be worth + // parallelizing. + ORE.emit([&]() { + return OptimizationRemark("serialize-small-tasks", + "SerializingSmallLoop", + L->getStartLoc(), L->getHeader()) + << "Serializing parallel loop that appears to be unprofitable " + << "to parallelize."; + }); + SerializeDetach(cast(L->getHeader()->getTerminator()), T, + /* ReplaceWithTaskFrame = */ taskContainsSync(T), &DT); + Hints.clearHintsMetadata(); + L->setDerivedFromTapirLoop(); + return true; +} + +namespace { +struct SerializeSmallTasks : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + SerializeSmallTasks() : FunctionPass(ID) { + initializeSerializeSmallTasksPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} + +char SerializeSmallTasks::ID = 0; +INITIALIZE_PASS_BEGIN(SerializeSmallTasks, "serialize-small-tasks", + "Serialize small Tapir tasks", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(SerializeSmallTasks, "serialize-small-tasks", + "Serialize small Tapir tasks", false, false) + +namespace llvm { +FunctionPass *createSerializeSmallTasksPass() { + return new SerializeSmallTasks(); +} +} // end namespace llvm + +/// runOnFunction - Run through all tasks in the function and simplify them in +/// post order. +/// +bool SerializeSmallTasks::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + TaskInfo &TI = getAnalysis().getTaskInfo(); + if (TI.isSerial()) + return false; + + auto &TLI = getAnalysis().getTLI(F); + auto &DT = getAnalysis().getDomTree(); + LoopInfo *LI = &getAnalysis().getLoopInfo(); + ScalarEvolution &SE = getAnalysis().getSE(); + const TargetTransformInfo &TTI = + getAnalysis().getTTI(F); + auto &AC = getAnalysis().getAssumptionCache(F); + // For the old PM, we can't use OptimizationRemarkEmitter as an analysis + // pass. Function analyses need to be preserved across loop transformations + // but ORE cannot be preserved (see comment before the pass definition). + OptimizationRemarkEmitter ORE(&F); + + LLVM_DEBUG(dbgs() << "SerializeSmallTasks running on function " << F.getName() + << "\n"); + + bool Changed = false; + if (SerializeUnprofitableLoops) + for (Loop *L : *LI) + Changed |= trySerializeSmallLoop(L, DT, LI, SE, TTI, AC, &TI, ORE, &TLI); + + if (Changed) + // Recalculate TaskInfo + TI.recalculate(*DT.getRoot()->getParent(), DT); + + return Changed; +} + +PreservedAnalyses SerializeSmallTasksPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (F.empty()) + return PreservedAnalyses::all(); + + TaskInfo &TI = AM.getResult(F); + if (TI.isSerial()) + return PreservedAnalyses::all(); + + auto &TLI = AM.getResult(F); + auto &SE = AM.getResult(F); + auto &LI = AM.getResult(F); + auto &TTI = AM.getResult(F); + auto &DT = AM.getResult(F); + auto &AC = AM.getResult(F); + auto &ORE = AM.getResult(F); + + + LLVM_DEBUG(dbgs() << "SerializeSmallTasks running on function " << F.getName() + << "\n"); + + bool Changed = false; + if (SerializeUnprofitableLoops) + for (Loop *L : LI) + Changed |= trySerializeSmallLoop(L, DT, &LI, SE, TTI, AC, &TI, ORE, &TLI); + + if (!Changed) + return PreservedAnalyses::all(); + + // Recalculate TaskInfo + TI.recalculate(*DT.getRoot()->getParent(), DT); + + PreservedAnalyses PA; + PA.preserve(); + PA.preserve(); + PA.preserve(); + PA.preserve(); + // TODO: Add more preserved analyses here. + return PA; +} diff --git a/llvm/lib/Transforms/Tapir/Tapir.cpp b/llvm/lib/Transforms/Tapir/Tapir.cpp new file mode 100644 index 00000000000000..bc4e056fc9aba7 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/Tapir.cpp @@ -0,0 +1,35 @@ +//===- Tapir.cpp ----------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMTapirOpts.a, which +// implements several transformations over the Tapir/LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm-c/Transforms/Tapir.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/PassRegistry.h" +#include "llvm/Transforms/Tapir.h" + +using namespace llvm; + +/// initializeTapirOpts - Initialize all passes linked into the +/// TapirOpts library. +void llvm::initializeTapirOpts(PassRegistry &Registry) { + initializeLoopSpawningTIPass(Registry); + initializeLowerTapirToTargetPass(Registry); + initializeTaskCanonicalizePass(Registry); + initializeTaskSimplifyPass(Registry); + initializeDRFScopedNoAliasWrapperPassPass(Registry); + initializeLoopStripMinePass(Registry); + initializeSerializeSmallTasksPass(Registry); +} diff --git a/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp b/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp new file mode 100644 index 00000000000000..1846e2a782ad7a --- /dev/null +++ b/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp @@ -0,0 +1,646 @@ +//===- TapirLoopInfo.cpp - Utility functions for Tapir loops --------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements utility functions for handling Tapir loops. +// +// Many of these routines are adapted from +// Transforms/Vectorize/LoopVectorize.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/TapirLoopInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "tapir" + +/// Create an analysis remark that explains why the transformation failed +/// +/// \p RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents the transformation. Otherwise \p TheLoop is used +/// for the location of the remark. \return the remark object that can be +/// streamed to. +/// +/// Based on createMissedAnalysis in the LoopVectorize pass. +OptimizationRemarkAnalysis +TapirLoopInfo::createMissedAnalysis(const char *PassName, StringRef RemarkName, + const Loop *TheLoop, Instruction *I) { + const Value *CodeRegion = TheLoop->getHeader(); + DebugLoc DL = TheLoop->getStartLoc(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, revert back to + // using the loop's. + if (I->getDebugLoc()) + DL = I->getDebugLoc(); + } + + OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion); + R << "Tapir loop not transformed: "; + return R; +} + +/// Update information on this Tapir loop based on its metadata. +void TapirLoopInfo::readTapirLoopMetadata(OptimizationRemarkEmitter &ORE) { + TapirLoopHints Hints(getLoop()); + + // Get a grainsize for this Tapir loop from the metadata, if the metadata + // gives a grainsize. + Grainsize = Hints.getGrainsize(); +} + +static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { + if (Ty->isPointerTy()) + return DL.getIntPtrType(Ty); + + // It is possible that char's or short's overflow when we ask for the loop's + // trip count, work around this by changing the type size. + if (Ty->getScalarSizeInBits() < 32) + return Type::getInt32Ty(Ty->getContext()); + + return Ty; +} + +static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { + Ty0 = convertPointerToIntegerType(DL, Ty0); + Ty1 = convertPointerToIntegerType(DL, Ty1); + if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) + return Ty0; + return Ty1; +} + +/// Adds \p Phi, with induction descriptor ID, to the inductions list. This can +/// set \p Phi as the main induction of the loop if \p Phi is a better choice +/// for the main induction than the existing one. +void TapirLoopInfo::addInductionPhi(PHINode *Phi, + const InductionDescriptor &ID) { + Inductions[Phi] = ID; + + Type *PhiTy = Phi->getType(); + const DataLayout &DL = Phi->getModule()->getDataLayout(); + + // Int inductions are special because we only allow one IV. + if (ID.getKind() == InductionDescriptor::IK_IntInduction && + ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && + isa(ID.getStartValue()) && + cast(ID.getStartValue())->isNullValue()) { + + // Get the widest type. + if (!WidestIndTy) + WidestIndTy = convertPointerToIntegerType(DL, PhiTy); + else + WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); + + // Use the phi node with the widest type as induction. Use the last + // one if there are multiple (no good reason for doing this other + // than it is expedient). We've checked that it begins at zero and + // steps by one, so this is a canonical induction variable. + if (!PrimaryInduction || PhiTy == WidestIndTy) + PrimaryInduction = Phi; + } + + // // Both the PHI node itself, and the "post-increment" value feeding + // // back into the PHI node may have external users. + // // We can allow those uses, except if the SCEVs we have for them rely + // // on predicates that only hold within the loop, since allowing the exit + // // currently means re-using this SCEV outside the loop. + // if (PSE.getUnionPredicate().isAlwaysTrue()) { + // AllowedExit.insert(Phi); + // AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); + // } + + LLVM_DEBUG(dbgs() << "TapirLoop: Found an induction variable: " << *Phi + << "\n"); +} + +/// Gather all induction variables in this loop that need special handling +/// during outlining. +bool TapirLoopInfo::collectIVs(PredicatedScalarEvolution &PSE, + const char *PassName, + OptimizationRemarkEmitter *ORE) { + Loop *L = getLoop(); + for (Instruction &I : *L->getHeader()) { + if (auto *Phi = dyn_cast(&I)) { + Type *PhiTy = Phi->getType(); + // Check that this PHI type is allowed. + if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && + !PhiTy->isPointerTy()) { + if (ORE) + ORE->emit(createMissedAnalysis(PassName, "CFGNotUnderstood", L, Phi) + << "loop control flow is not understood by loop spawning"); + LLVM_DEBUG(dbgs() << "TapirLoop: Found an non-int non-pointer PHI.\n"); + return false; + } + + // We only allow if-converted PHIs with exactly two incoming values. + if (Phi->getNumIncomingValues() != 2) { + if (ORE) + ORE->emit(createMissedAnalysis(PassName, "CFGNotUnderstood", L, Phi) + << "loop control flow is not understood by loop spawning"); + LLVM_DEBUG(dbgs() << "TapirLoop: Found an invalid PHI.\n"); + return false; + } + + InductionDescriptor ID; + if (InductionDescriptor::isInductionPHI(Phi, L, PSE, ID)) { + LLVM_DEBUG(dbgs() << "\tFound induction PHI " << *Phi << "\n"); + addInductionPhi(Phi, ID); + // if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) + // Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); + continue; + } + + // As a last resort, coerce the PHI to a AddRec expression and re-try + // classifying it a an induction PHI. + if (InductionDescriptor::isInductionPHI(Phi, L, PSE, ID, true)) { + LLVM_DEBUG(dbgs() << "\tCoerced induction PHI " << *Phi << "\n"); + addInductionPhi(Phi, ID); + continue; + } + + LLVM_DEBUG(dbgs() << "\tPassed PHI " << *Phi << "\n"); + } // end of PHI handling + } + + if (!PrimaryInduction) { + LLVM_DEBUG(dbgs() + << "TapirLoop: Did not find a primary integer induction var.\n"); + if (ORE) + ORE->emit(createMissedAnalysis(PassName, "NoInductionVariable", L) + << "canonical loop induction variable could not be identified"); + if (Inductions.empty()) + return false; + } + + // Now we know the widest induction type, check if our found induction is the + // same size. + // + // TODO: Check if this code is dead due to IndVarSimplify. + if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) + PrimaryInduction = nullptr; + + return true; +} + +/// Replace all induction variables in this loop that are not primary with +/// stronger forms. +void TapirLoopInfo::replaceNonPrimaryIVs(PredicatedScalarEvolution &PSE) { + BasicBlock *Header = getLoop()->getHeader(); + IRBuilder<> B(&*Header->getFirstInsertionPt()); + const DataLayout &DL = Header->getModule()->getDataLayout(); + SmallVector, 4> InductionsToRemove; + + // Replace all non-primary inductions with strengthened forms. + for (auto &InductionEntry : Inductions) { + PHINode *OrigPhi = InductionEntry.first; + InductionDescriptor II = InductionEntry.second; + if (OrigPhi == PrimaryInduction) continue; + LLVM_DEBUG(dbgs() << "Replacing Phi " << *OrigPhi << "\n"); + // If Induction is not canonical, replace it with some computation based on + // PrimaryInduction. + Type *StepType = II.getStep()->getType(); + Instruction::CastOps CastOp = + CastInst::getCastOpcode(PrimaryInduction, true, StepType, true); + Value *CRD = B.CreateCast(CastOp, PrimaryInduction, StepType, "cast.crd"); + Value *PhiRepl = emitTransformedIndex(B, CRD, PSE.getSE(), DL, II); + PhiRepl->setName(OrigPhi->getName() + ".tl.repl"); + OrigPhi->replaceAllUsesWith(PhiRepl); + InductionsToRemove.push_back(InductionEntry); + } + + // Remove all inductions that were replaced from Inductions. + for (auto &InductionEntry : InductionsToRemove) { + PHINode *OrigPhi = InductionEntry.first; + OrigPhi->eraseFromParent(); + Inductions.erase(OrigPhi); + } +} + +bool TapirLoopInfo::getLoopCondition(const char *PassName, + OptimizationRemarkEmitter *ORE) { + Loop *L = getLoop(); + + // Check that the latch is terminated by a branch instruction. The + // LoopRotate pass can be helpful to ensure this property. + BranchInst *BI = + dyn_cast(L->getLoopLatch()->getTerminator()); + if (!BI || BI->isUnconditional()) { + LLVM_DEBUG(dbgs() + << "Loop-latch terminator is not a conditional branch.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(PassName, "NoLatchBranch", + L) + << "loop latch is not terminated by a conditional branch"); + return false; + } + // Check that the condition is an integer-equality comparison. The + // IndVarSimplify pass should transform Tapir loops to use integer-equality + // comparisons when the loop can be analyzed. + { + const ICmpInst *Cond = dyn_cast(BI->getCondition()); + if (!Cond) { + LLVM_DEBUG(dbgs() << + "Loop-latch condition is not an integer comparison.\n"); + if (ORE) + ORE->emit(TapirLoopInfo::createMissedAnalysis(PassName, "NotIntCmp", L) + << "loop-latch condition is not an integer comparison"); + return false; + } + if (!Cond->isEquality()) { + LLVM_DEBUG(dbgs() << + "Loop-latch condition is not an equality comparison.\n"); + // TODO: Find a reasonable analysis message to give to users. + // if (ORE) + // ORE->emit(TapirLoopInfo::createMissedAnalysis(PassName, + // "NonCanonicalCmp", L) + // << "non-canonical loop-latch condition"); + return false; + } + } + Condition = dyn_cast(BI->getCondition()); + LLVM_DEBUG(dbgs() << "\tLoop condition " << *Condition << "\n"); + + if (Condition->getOperand(0) == PrimaryInduction || + Condition->getOperand(1) == PrimaryInduction) { + // The condition examines the primary induction before the increment. Check + // to see if the condition directs control to exit the loop once + // PrimaryInduction equals the end value. + if ((ICmpInst::ICMP_EQ == Condition->getPredicate() && + BI->getSuccessor(1) == L->getHeader()) || + (ICmpInst::ICMP_NE == Condition->getPredicate() && + BI->getSuccessor(0) == L->getHeader())) + // The end iteration is included in the loop bounds. + InclusiveRange = true; + } + + return true; +} + +static Value *getEscapeValue(Instruction *UI, const InductionDescriptor &II, + Value *TripCount, PredicatedScalarEvolution &PSE, + bool PostInc) { + const DataLayout &DL = UI->getModule()->getDataLayout(); + IRBuilder<> B(&*UI->getParent()->getFirstInsertionPt()); + Value *EffTripCount = TripCount; + if (!PostInc) + EffTripCount = B.CreateSub( + TripCount, ConstantInt::get(TripCount->getType(), 1)); + + Value *Count = !II.getStep()->getType()->isIntegerTy() + ? B.CreateCast(Instruction::SIToFP, EffTripCount, + II.getStep()->getType()) + : B.CreateSExtOrTrunc(EffTripCount, II.getStep()->getType()); + if (PostInc) + Count->setName("cast.count"); + else + Count->setName("cast.cmo"); + + Value *Escape = emitTransformedIndex(B, Count, PSE.getSE(), DL, II); + Escape->setName(UI->getName() + ".escape"); + return Escape; +} + +/// Fix up external users of the induction variable. We assume we are in LCSSA +/// form, with all external PHIs that use the IV having one input value, coming +/// from the remainder loop. We need those PHIs to also have a correct value +/// for the IV when arriving directly from the middle block. +void TapirLoopInfo::fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II, + PredicatedScalarEvolution &PSE) { + // There are two kinds of external IV usages - those that use the value + // computed in the last iteration (the PHI) and those that use the penultimate + // value (the value that feeds into the phi from the loop latch). + // We allow both, but they, obviously, have different values. + assert(getExitBlock() && "Expected a single exit block"); + assert(getTripCount() && "Expected valid trip count"); + Loop *L = getLoop(); + Task *T = getTask(); + Value *TripCount = getTripCount(); + + DenseMap MissingVals; + + // An external user of the last iteration's value should see the value that + // the remainder loop uses to initialize its own IV. + Value *PostInc = OrigPhi->getIncomingValueForBlock(L->getLoopLatch()); + for (User *U : PostInc->users()) { + Instruction *UI = cast(U); + if (!L->contains(UI) && !T->encloses(UI->getParent())) { + assert(isa(UI) && "Expected LCSSA form"); + MissingVals[UI] = getEscapeValue(UI, II, TripCount, PSE, true); + } + } + + // An external user of the penultimate value needs to see TripCount - Step. + // The simplest way to get this is to recompute it from the constituent SCEVs, + // that is Start + (Step * (TripCount - 1)). + for (User *U : OrigPhi->users()) { + Instruction *UI = cast(U); + if (!L->contains(UI) && !T->encloses(UI->getParent())) { + assert(isa(UI) && "Expected LCSSA form"); + MissingVals[UI] = getEscapeValue(UI, II, TripCount, PSE, false); + } + } + + for (auto &I : MissingVals) { + LLVM_DEBUG(dbgs() << "Replacing external IV use:" << *I.first << " with " + << *I.second << "\n"); + PHINode *PHI = cast(I.first); + PHI->replaceAllUsesWith(I.second); + PHI->eraseFromParent(); + } +} + +const SCEV *TapirLoopInfo::getBackedgeTakenCount( + PredicatedScalarEvolution &PSE) const { + Loop *L = getLoop(); + ScalarEvolution *SE = PSE.getSE(); + const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount(); + if (BackedgeTakenCount == SE->getCouldNotCompute()) + BackedgeTakenCount = SE->getExitCount(L, L->getLoopLatch()); + + if (BackedgeTakenCount == SE->getCouldNotCompute()) + return BackedgeTakenCount; + + Type *IdxTy = getWidestInductionType(); + + // The exit count might have the type of i64 while the phi is i32. This can + // happen if we have an induction variable that is sign extended before the + // compare. The only way that we get a backedge taken count is that the + // induction variable was signed and as such will not overflow. In such a case + // truncation is legal. + if (BackedgeTakenCount->getType()->getPrimitiveSizeInBits() > + IdxTy->getPrimitiveSizeInBits()) + BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount, IdxTy); + BackedgeTakenCount = SE->getNoopOrZeroExtend(BackedgeTakenCount, IdxTy); + + return BackedgeTakenCount; +} + +const SCEV *TapirLoopInfo::getExitCount(const SCEV *BackedgeTakenCount, + PredicatedScalarEvolution &PSE) const { + ScalarEvolution *SE = PSE.getSE(); + const SCEV *ExitCount; + if (InclusiveRange) + ExitCount = BackedgeTakenCount; + else + // Get the total trip count from the count by adding 1. + ExitCount = SE->getAddExpr( + BackedgeTakenCount, SE->getOne(BackedgeTakenCount->getType())); + return ExitCount; +} + +/// Returns (and creates if needed) the original loop trip count. +Value *TapirLoopInfo::getOrCreateTripCount(PredicatedScalarEvolution &PSE, + const char *PassName, + OptimizationRemarkEmitter *ORE) { + if (TripCount) + return TripCount; + Loop *L = getLoop(); + + // Get the existing SSA value being used for the end condition of the loop. + if (!Condition) + if (!getLoopCondition(PassName, ORE)) + return nullptr; + + Value *ConditionEnd = Condition->getOperand(0); + { + if (!L->isLoopInvariant(ConditionEnd)) { + if (!L->isLoopInvariant(Condition->getOperand(1))) + return nullptr; + ConditionEnd = Condition->getOperand(1); + } + } + assert(L->isLoopInvariant(ConditionEnd) && + "Condition end is not loop invariant."); + + IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + ScalarEvolution *SE = PSE.getSE(); + + // Find the loop boundaries. + const SCEV *BackedgeTakenCount = SE->getExitCount(L, L->getLoopLatch()); + + if (BackedgeTakenCount == SE->getCouldNotCompute()) { + LLVM_DEBUG(dbgs() << "Could not compute backedge-taken count.\n"); + return nullptr; + } + + const SCEV *ExitCount = getExitCount(BackedgeTakenCount, PSE); + + if (ExitCount == SE->getSCEV(ConditionEnd)) { + TripCount = ConditionEnd; + return TripCount; + } + + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + Type *IdxTy = getWidestInductionType(); + + // Expand the trip count and place the new instructions in the preheader. + // Notice that the pre-header does not change, only the loop body. + SCEVExpander Exp(*SE, DL, "induction"); + + // Count holds the overall loop count (N). + TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + L->getLoopPreheader()->getTerminator()); + + if (TripCount->getType()->isPointerTy()) + TripCount = + CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int", + L->getLoopPreheader()->getTerminator()); + + // Try to use the existing ConditionEnd for the trip count. + if (TripCount != ConditionEnd) { + // Compare the SCEV's of the TripCount and ConditionEnd to see if they're + // equal. Normalize these SCEV types to be IdxTy. + const SCEV *TripCountSCEV = + SE->getNoopOrAnyExtend(SE->getSCEV(TripCount), IdxTy); + const SCEV *ConditionEndSCEV = + SE->getNoopOrAnyExtend(SE->getSCEV(ConditionEnd), IdxTy); + if (SE->getMinusSCEV(TripCountSCEV, ConditionEndSCEV)->isZero()) + TripCount = ConditionEnd; + } + + return TripCount; +} + +/// Top-level call to prepare a Tapir loop for outlining. +bool TapirLoopInfo::prepareForOutlining( + DominatorTree &DT, LoopInfo &LI, TaskInfo &TI, + PredicatedScalarEvolution &PSE, AssumptionCache &AC, const char *PassName, + OptimizationRemarkEmitter &ORE, const TargetTransformInfo &TTI) { + LLVM_DEBUG(dbgs() << "Preparing loop for outlining " << *getLoop() << "\n"); + + // Collect the IVs in this loop. + collectIVs(PSE, PassName, &ORE); + + // If no primary induction was found, just bail. + if (!PrimaryInduction) + return false; + + LLVM_DEBUG(dbgs() << "\tPrimary induction " << *PrimaryInduction << "\n"); + + // Replace any non-primary IV's. + replaceNonPrimaryIVs(PSE); + + // Compute the trip count for this loop. + // + // We need the trip count for two reasons. + // + // 1) In the call to the helper that will replace this loop, we need to pass + // the total number of loop iterations. + // + // 2) In the helper itself, the strip-mined loop must iterate to the + // end-iteration argument, not the total number of iterations. + Value *TripCount = getOrCreateTripCount(PSE, PassName, &ORE); + if (!TripCount) { + ORE.emit(createMissedAnalysis(PassName, "NoTripCount", getLoop()) + << "could not compute finite loop trip count."); + return false; + } + + LLVM_DEBUG(dbgs() << "\tTrip count " << *TripCount << "\n"); + + // If necessary, rewrite the loop condition to use TripCount. This code + // should run very rarely, since IndVarSimplify should have already simplified + // the loop's induction variables. + if ((Condition->getOperand(0) != TripCount) && + (Condition->getOperand(1) != TripCount)) { + Loop *L = getLoop(); + // For now, we don't handle the case where there are multiple uses of the + // condition. + assert(Condition->hasOneUse() && + "Attempting to rewrite Condition with multiple uses."); + // Get the IV to use for the new condition: either PrimaryInduction or its + // incremented value, depending on whether the range is inclusive. + Value *IVForCond = + InclusiveRange + ? PrimaryInduction + : PrimaryInduction->getIncomingValueForBlock(L->getLoopLatch()); + // Get the parity of the LoopLatch terminator, i.e., whether the true or + // false branch is the backedge. + BranchInst *BI = dyn_cast(L->getLoopLatch()->getTerminator()); + bool BEBranchParity = (BI->getSuccessor(0) == L->getHeader()); + // Create the new Condition + ICmpInst *NewCond = + new ICmpInst(BEBranchParity ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ, + IVForCond, TripCount); + NewCond->setDebugLoc(Condition->getDebugLoc()); + // Replace the old Condition with the new Condition. + ReplaceInstWithInst(Condition, NewCond); + Condition = NewCond; + } + + // FIXME: This test is probably too simple. + assert(((Condition->getOperand(0) == TripCount) || + (Condition->getOperand(1) == TripCount)) && + "Condition does not use trip count."); + + // Fixup all external uses of the IVs. + for (auto &InductionEntry : Inductions) + fixupIVUsers(InductionEntry.first, InductionEntry.second, PSE); + + return true; +} + +/// Transforms an induction descriptor into a direct computation of its value at +/// Index. +/// +/// Copied from lib/Transforms/Vectorize/LoopVectorize.cpp +Value *llvm::emitTransformedIndex( + IRBuilder<> &B, Value *Index, ScalarEvolution *SE, const DataLayout &DL, + const InductionDescriptor &ID) { + + SCEVExpander Exp(*SE, DL, "induction"); + auto Step = ID.getStep(); + auto StartValue = ID.getStartValue(); + assert(Index->getType() == Step->getType() && + "Index type does not match StepValue type"); + + // Note: the IR at this point is broken. We cannot use SE to create any new + // SCEV and then expand it, hoping that SCEV's simplification will give us + // a more optimal code. Unfortunately, attempt of doing so on invalid IR may + // lead to various SCEV crashes. So all we can do is to use builder and rely + // on InstCombine for future simplifications. Here we handle some trivial + // cases only. + auto CreateAdd = [&B](Value *X, Value *Y) { + assert(X->getType() == Y->getType() && "Types don't match!"); + if (auto *CX = dyn_cast(X)) + if (CX->isZero()) + return Y; + if (auto *CY = dyn_cast(Y)) + if (CY->isZero()) + return X; + return B.CreateAdd(X, Y); + }; + + auto CreateMul = [&B](Value *X, Value *Y) { + assert(X->getType() == Y->getType() && "Types don't match!"); + if (auto *CX = dyn_cast(X)) + if (CX->isOne()) + return Y; + if (auto *CY = dyn_cast(Y)) + if (CY->isOne()) + return X; + return B.CreateMul(X, Y); + }; + + switch (ID.getKind()) { + case InductionDescriptor::IK_IntInduction: { + assert(Index->getType() == StartValue->getType() && + "Index type does not match StartValue type"); + if (ID.getConstIntStepValue() && ID.getConstIntStepValue()->isMinusOne()) + return B.CreateSub(StartValue, Index); + auto *Offset = CreateMul( + Index, Exp.expandCodeFor(Step, Index->getType(), &*B.GetInsertPoint())); + return CreateAdd(StartValue, Offset); + } + case InductionDescriptor::IK_PtrInduction: { + assert(isa(Step) && + "Expected constant step for pointer induction"); + return B.CreateGEP( + nullptr, StartValue, + CreateMul(Index, Exp.expandCodeFor(Step, Index->getType(), + &*B.GetInsertPoint()))); + } + case InductionDescriptor::IK_FpInduction: { + assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); + auto InductionBinOp = ID.getInductionBinOp(); + assert(InductionBinOp && + (InductionBinOp->getOpcode() == Instruction::FAdd || + InductionBinOp->getOpcode() == Instruction::FSub) && + "Original bin op should be defined for FP induction"); + + Value *StepValue = cast(Step)->getValue(); + + // Floating point operations had to be 'fast' to enable the induction. + FastMathFlags Flags; + Flags.setFast(); + + Value *MulExp = B.CreateFMul(StepValue, Index); + if (isa(MulExp)) + // We have to check, the MulExp may be a constant. + cast(MulExp)->setFastMathFlags(Flags); + + Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp, + "induction"); + if (isa(BOp)) + cast(BOp)->setFastMathFlags(Flags); + + return BOp; + } + case InductionDescriptor::IK_NoInduction: + return nullptr; + } + llvm_unreachable("invalid enum"); +} diff --git a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp new file mode 100644 index 00000000000000..d632c7113af3d0 --- /dev/null +++ b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp @@ -0,0 +1,610 @@ +//===- TapirToTarget.cpp - Convert Tapir into parallel-runtime calls ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass converts functions that use Tapir instructions to call out to a +// target parallel runtime system. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Tapir/TapirToTarget.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Timer.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Tapir.h" +#include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +#define DEBUG_TYPE "tapir2target" + +using namespace llvm; + +cl::opt DebugABICalls( + "debug-abi-calls", cl::init(false), cl::Hidden, + cl::desc("Insert ABI calls simply, to debug generated IR")); + +cl::opt UseExternalABIFunctions( + "use-external-abi-functions", cl::init(false), cl::Hidden, + cl::desc("Use ABI functions defined externally, rather than " + "compiler-generated versions")); + +static const char TimerGroupName[] = DEBUG_TYPE; +static const char TimerGroupDescription[] = "Tapir to Target"; + +class TapirToTargetImpl { +public: + TapirToTargetImpl(Module &M, function_ref GetAA, + function_ref GetDT, + function_ref GetTI, + function_ref GetAC, + function_ref GetTLI) + : M(M), GetAA(GetAA), GetDT(GetDT), GetTI(GetTI), GetAC(GetAC), + GetTLI(GetTLI) + {} + ~TapirToTargetImpl() { + if (Target) + delete Target; + } + + bool run(); + +private: + bool unifyReturns(Function &F); + bool processFunction(Function &F, SmallVectorImpl &NewHelpers); + TFOutlineMapTy outlineAllTasks(Function &F, + SmallVectorImpl &AllTaskFrames, + OutlineAnalysis &OA, TaskInfo &TI); + bool processSimpleABI(Function &F, BasicBlock *TFEntry); + bool processRootTask(Function &F, TFOutlineMapTy &TFToOutline, + OutlineAnalysis &OA, TaskInfo &TI); + bool processSpawnerTaskFrame(Spindle *TF, TFOutlineMapTy &TFToOutline, + OutlineAnalysis &OA, TaskInfo &TI); + bool processOutlinedTask(Task *T, TFOutlineMapTy &TFToOutline, + OutlineAnalysis &OA, TaskInfo &TI); + +private: + TapirTarget *Target = nullptr; + + Module &M; + + function_ref GetAA; + function_ref GetDT; + function_ref GetTI; + function_ref GetAC; + function_ref GetTLI; +}; + +bool TapirToTargetImpl::unifyReturns(Function &F) { + NamedRegionTimer NRT("unifyReturns", "Unify returns", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + SmallVector ReturningBlocks; + for (BasicBlock &BB : F) + if (isa(BB.getTerminator())) + ReturningBlocks.push_back(&BB); + + // If this function already has no returns or a single return, then terminate + // early. + if (ReturningBlocks.size() <= 1) + return false; + + BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), + "UnifiedReturnBlock", &F); + PHINode *PN = nullptr; + if (F.getReturnType()->isVoidTy()) { + ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); + } else { + // If the function doesn't return void... add a PHI node to the block... + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal", NewRetBlock); + ReturnInst::Create(F.getContext(), PN, NewRetBlock); + } + + // Loop over all of the blocks, replacing the return instruction with an + // unconditional branch. + // + for (BasicBlock *BB : ReturningBlocks) { + // Add an incoming element to the PHI node for every return instruction that + // is merging into this new block... + if (PN) + PN->addIncoming(BB->getTerminator()->getOperand(0), BB); + + BB->back().eraseFromParent(); // Remove the return insn + BranchInst::Create(NewRetBlock, BB); + } + return true; +} + +/// Outline all tasks in this function in post order. +TFOutlineMapTy +TapirToTargetImpl::outlineAllTasks(Function &F, + SmallVectorImpl &AllTaskFrames, + OutlineAnalysis &OA, TaskInfo &TI) { + NamedRegionTimer NRT("outlineAllTasks", "Outline all tasks", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + TFOutlineMapTy TFToOutline; + + // Determine the inputs for all tasks. + TFValueSetMap TFInputs, TFOutputs; + findAllTaskFrameInputs(TFInputs, TFOutputs, AllTaskFrames, F, OA.DT, TI); + + DenseMap> HelperInputs; + + for (Spindle *TF : AllTaskFrames) { + // At this point, all subtaskframess of TF must have been processed. + // Replace the tasks with calls to their outlined helper functions. + for (Spindle *SubTF : TF->subtaskframes()) + TFToOutline[SubTF].replaceReplCall( + replaceTaskFrameWithCallToOutline(SubTF, TFToOutline[SubTF], + HelperInputs[SubTF])); + + // TODO: Add support for outlining taskframes with no associated task. Such + // a facility would allow the frontend to create nested sync regions that + // are properly outlined. + + Task *T = TF->getTaskFromTaskFrame(); + if (!T) { + ValueToValueMapTy VMap; + ValueToValueMapTy InputMap; + TFToOutline[TF] = outlineTaskFrame(TF, TFInputs[TF], HelperInputs[TF], + &Target->getDestinationModule(), VMap, + Target->getArgStructMode(), + Target->getReturnType(), InputMap, OA); + // If the taskframe TF does not catch an exception from the taskframe, + // then the outlined function cannot throw. + if (F.doesNotThrow() && !getTaskFrameResume(TF->getTaskFrameCreate())) + TFToOutline[TF].Outline->setDoesNotThrow(); + // Don't inherit the noreturn attribute from the caller. + if (F.doesNotReturn()) + TFToOutline[TF].Outline->removeFnAttr(Attribute::NoReturn); + Target->addHelperAttributes(*TFToOutline[TF].Outline); + + // Allow the Target to update any internal structures after outlining. + Target->remapAfterOutlining(TF->getEntry(), VMap); + + // Update subtaskframe outline info to reflect the fact that their parent + // taskframe was outlined. + for (Spindle *SubTF : TF->subtaskframes()) + TFToOutline[SubTF].remapOutlineInfo(VMap, InputMap); + + continue; + } + + // Outline the task, if necessary, and add the outlined function to the + // mapping. + + ValueToValueMapTy VMap; + ValueToValueMapTy InputMap; + TFToOutline[TF] = outlineTask(T, TFInputs[TF], HelperInputs[TF], + &Target->getDestinationModule(), VMap, + Target->getArgStructMode(), + Target->getReturnType(), InputMap, OA); + // If the detach for task T does not catch an exception from the task, then + // the outlined function cannot throw. + if (F.doesNotThrow() && !T->getDetach()->hasUnwindDest()) + TFToOutline[TF].Outline->setDoesNotThrow(); + Target->addHelperAttributes(*TFToOutline[TF].Outline); + + // Update subtask outline info to reflect the fact that their spawner was + // outlined. + for (Spindle *SubTF : TF->subtaskframes()) + TFToOutline[SubTF].remapOutlineInfo(VMap, InputMap); + } + + // Insert calls to outlined helpers for taskframe roots. + for (Spindle *TF : TI.getRootTask()->taskframe_roots()) + TFToOutline[TF].replaceReplCall( + replaceTaskFrameWithCallToOutline(TF, TFToOutline[TF], + HelperInputs[TF])); + + return TFToOutline; +} + +/// Process the Tapir instructions in function \p F directly. +bool TapirToTargetImpl::processSimpleABI(Function &F, BasicBlock *TFEntry) { + NamedRegionTimer NRT("processSimpleABI", "Process simple ABI", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + + // Get the simple Tapir instructions to process, including syncs and + // loop-grainsize calls. + SmallVector Syncs; + SmallVector GrainsizeCalls; + SmallVector TaskFrameAddrCalls; + SmallVector TapirRTCalls; + SmallVector ReducerOperations; + for (BasicBlock &BB : F) { + for (Instruction &I : BB) { + // Record calls to get Tapir-loop grainsizes. + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::tapir_loop_grainsize == II->getIntrinsicID()) + GrainsizeCalls.push_back(II); + + // Record calls to task_frameaddr intrinsics. + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::task_frameaddress == II->getIntrinsicID()) + TaskFrameAddrCalls.push_back(II); + + // Record calls to tapir_runtime_start intrinsics. We rely on analyzing + // uses of these intrinsic calls to find calls to tapir_runtime_end. + if (IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::tapir_runtime_start == II->getIntrinsicID()) + TapirRTCalls.push_back(II); + + // Record sync instructions in this function. + if (SyncInst *SI = dyn_cast(&I)) + Syncs.push_back(SI); + + if (!dyn_cast(&I)) + continue; + + if (isTapirIntrinsic(Intrinsic::hyper_lookup, &I, nullptr) || + isTapirIntrinsic(Intrinsic::reducer_register, &I, nullptr) || + isTapirIntrinsic(Intrinsic::reducer_unregister, &I, nullptr)) + ReducerOperations.push_back(cast(&I)); + } + } + + // Lower simple Tapir instructions in this function. Collect the set of + // helper functions generated by this process. + bool Changed = false; + + // Lower calls to get Tapir-loop grainsizes. + while (!GrainsizeCalls.empty()) { + CallInst *GrainsizeCall = GrainsizeCalls.pop_back_val(); + LLVM_DEBUG(dbgs() << "Lowering grainsize call " << *GrainsizeCall << "\n"); + Target->lowerGrainsizeCall(GrainsizeCall); + Changed = true; + } + + // Lower calls to task_frameaddr intrinsics. + while (!TaskFrameAddrCalls.empty()) { + CallInst *TaskFrameAddrCall = TaskFrameAddrCalls.pop_back_val(); + LLVM_DEBUG(dbgs() << "Lowering task_frameaddr call " << *TaskFrameAddrCall + << "\n"); + Target->lowerTaskFrameAddrCall(TaskFrameAddrCall); + Changed = true; + } + Target->lowerTapirRTCalls(TapirRTCalls, F, TFEntry); + + // Process the set of syncs. + while (!Syncs.empty()) { + SyncInst *SI = Syncs.pop_back_val(); + Target->lowerSync(*SI); + Changed = true; + } + + while (!ReducerOperations.empty()) { + CallBase *CI = ReducerOperations.pop_back_val(); + Target->lowerReducerOperation(CI); + Changed = true; + } + + return Changed; +} + +bool TapirToTargetImpl::processRootTask( + Function &F, TFOutlineMapTy &TFToOutline, OutlineAnalysis &OA, + TaskInfo &TI) { + NamedRegionTimer NRT("processRootTask", "Process root task", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + bool Changed = false; + // Check if the root task performs a spawn + bool PerformsSpawn = false; + for (Spindle *TF : TI.getRootTask()->taskframe_roots()) { + if (TF->getTaskFromTaskFrame()) { + PerformsSpawn = true; + break; + } + } + if (PerformsSpawn) { + Changed = true; + // Process root-task function F as a spawner. + Target->preProcessRootSpawner(F, &F.getEntryBlock()); + + // Process each call to a subtask. + for (Spindle *TF : TI.getRootTask()->taskframe_roots()) + if (TF->getTaskFromTaskFrame()) + Target->processSubTaskCall(TFToOutline[TF], OA.DT); + + Target->postProcessRootSpawner(F, &F.getEntryBlock()); + } + // Process the Tapir instructions in F directly. + Changed |= processSimpleABI(F, &F.getEntryBlock()); + return Changed; +} + +bool TapirToTargetImpl::processSpawnerTaskFrame( + Spindle *TF, TFOutlineMapTy &TFToOutline, OutlineAnalysis &OA, + TaskInfo &TI) { + NamedRegionTimer NRT("processSpawnerTaskFrame", "Process spawner taskframe", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Function &F = *TFToOutline[TF].Outline; + + // Process function F as a spawner. + Target->preProcessRootSpawner(F, TF->getEntry()); + + // Process each call to a subtask. + for (Spindle *SubTF : TF->subtaskframes()) + if (SubTF->getTaskFromTaskFrame()) + Target->processSubTaskCall(TFToOutline[SubTF], OA.DT); + + Target->postProcessRootSpawner(F, TF->getEntry()); + + // Process the Tapir instructions in F directly. + processSimpleABI(F, TF->getEntry()); + return true; +} + +bool TapirToTargetImpl::processOutlinedTask( + Task *T, TFOutlineMapTy &TFToOutline, OutlineAnalysis &OA, TaskInfo &TI) { + NamedRegionTimer NRT("processOutlinedTask", "Process outlined task", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Spindle *TF = getTaskFrameForTask(T); + Function &F = *TFToOutline[TF].Outline; + + Instruction *DetachPt = TFToOutline[TF].DetachPt; + Instruction *TaskFrameCreate = TFToOutline[TF].TaskFrameCreate; + + Target->preProcessOutlinedTask(F, DetachPt, TaskFrameCreate, !T->isSerial(), + TF->getEntry()); + // Process each call to a subtask. + for (Spindle *SubTF : TF->subtaskframes()) + if (SubTF->getTaskFromTaskFrame()) + Target->processSubTaskCall(TFToOutline[SubTF], OA.DT); + + Target->postProcessOutlinedTask(F, DetachPt, TaskFrameCreate, !T->isSerial(), + TF->getEntry()); + + // Process the Tapir instructions in F directly. + processSimpleABI(F, TF->getEntry()); + return true; +} + +// Helper method to check if the given taskframe spindle performs any spawns. +static bool isSpawningTaskFrame(const Spindle *TF) { + for (const Spindle *SubTF : TF->subtaskframes()) + if (SubTF->getTaskFromTaskFrame()) + return true; + return false; +} + +// Helper method to check if the given taskframe corresponds to a spawned task. +static bool isSpawnedTaskFrame(const Spindle *TF) { + return TF->getTaskFromTaskFrame(); +} + +bool TapirToTargetImpl::processFunction( + Function &F, SmallVectorImpl &NewHelpers) { + LLVM_DEBUG(dbgs() << "Tapir: Processing function " << F.getName() << "\n"); + + // Get the necessary analysis results. + OutlineAnalysis OA(GetAA(F), GetAC(F), GetDT(F)); + TaskInfo &TI = GetTI(F); + splitTaskFrameCreateBlocks(F, &OA.DT, &TI); + TI.findTaskFrameTree(); + + bool ChangedCFG = false; + { + NamedRegionTimer NRT("TargetPreProcess", "Target preprocessing", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + ChangedCFG = Target->preProcessFunction(F, TI); + } // end timed region + + // If we don't need to do outlining, then just handle the simple ABI. + if (!Target->shouldDoOutlining(F)) { + // Process the Tapir instructions in F directly. + if (!Target->processOrdinaryFunction(F, &F.getEntryBlock())) + processSimpleABI(F, &F.getEntryBlock()); + return ChangedCFG; + } + + // Traverse the tasks in this function in post order. + SmallVector AllTaskFrames; + + // Collect all taskframes in the function in postorder. + for (Spindle *TFRoot : TI.getRootTask()->taskframe_roots()) + for (Spindle *TFSpindle : post_order>(TFRoot)) + AllTaskFrames.push_back(TFSpindle); + + // Fixup external uses of values defined in taskframes. + for (Spindle *TF : AllTaskFrames) + fixupTaskFrameExternalUses(TF, TI, OA.DT); + + // Outline all tasks in a target-oblivious manner. + TFOutlineMapTy TFToOutline = outlineAllTasks(F, AllTaskFrames, OA, TI); + + // Perform target-specific processing of this function and all newly created + // helpers. + for (Spindle *TF : AllTaskFrames) { + if (isSpawningTaskFrame(TF) && !isSpawnedTaskFrame(TF)) + processSpawnerTaskFrame(TF, TFToOutline, OA, TI); + else if (isSpawnedTaskFrame(TF)) + processOutlinedTask(TF->getTaskFromTaskFrame(), TFToOutline, OA, TI); + else + if (!Target->processOrdinaryFunction(*TFToOutline[TF].Outline, + TF->getEntry())) + processSimpleABI(*TFToOutline[TF].Outline, TF->getEntry()); + NewHelpers.push_back(TFToOutline[TF].Outline); + } + // Process the root task + processRootTask(F, TFToOutline, OA, TI); + + { + NamedRegionTimer NRT("TargetPostProcess", "Target postprocessing", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + Target->postProcessFunction(F); + for (Function *H : NewHelpers) + Target->postProcessHelper(*H); + } // end timed region + + LLVM_DEBUG({ + NamedRegionTimer NRT("FunctionVerify", + "Post-lowering function verification", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + if (verifyFunction(F, &errs())) { + LLVM_DEBUG(dbgs() << "Function after lowering:" << F); + llvm_unreachable("Tapir lowering produced bad IR!"); + } + for (Function *H : NewHelpers) + if (verifyFunction(*H, &errs())) { + LLVM_DEBUG(dbgs() << "Function after lowering:" << *H); + llvm_unreachable("Tapir lowering produced bad IR!"); + } + }); + + return ChangedCFG || !NewHelpers.empty(); +} + +bool TapirToTargetImpl::run() { + // Add functions that detach to the work list. + SmallVector WorkList; + { + NamedRegionTimer NRT("shouldProcessFunction", "Find functions to process", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + for (Function &F : M) { + if (F.empty()) + continue; + // TODO: Use per-function Tapir targets? + if (!Target) { + TargetLibraryInfo &TLI = GetTLI(F); + Target = getTapirTargetFromID(M, TLI.getTapirTarget()); + if (TapirTargetOptions *Options = TLI.getTapirTargetOptions()) + Target->setOptions(*Options); + } + assert(Target && "Missing Tapir target"); + if (Target->shouldProcessFunction(F)) + WorkList.push_back(&F); + } + } + + // Quit early if there are no functions in this module to lower. + if (WorkList.empty()) + return false; + + // There are functions in this module to lower. Prepare the module for Tapir + // lowering. + Target->prepareModule(); + + bool Changed = false; + while (!WorkList.empty()) { + // Process the next function. + Function *F = WorkList.pop_back_val(); + SmallVector NewHelpers; + + Changed |= processFunction(*F, NewHelpers); + + // Check the generated helper functions to see if any need to be processed, + // that is, to see if any of them themselves detach a subtask. + { + NamedRegionTimer NRT("shouldProcessHelper", + "Find helper functions to process", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); + for (Function *Helper : NewHelpers) + if (Target->shouldProcessFunction(*Helper)) + WorkList.push_back(Helper); + } + } + return Changed; +} + +PreservedAnalyses TapirToTargetPass::run(Module &M, ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + auto GetAA = [&FAM](Function &F) -> AAResults & { + return FAM.getResult(F); + }; + auto GetDT = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); + }; + auto GetTI = [&FAM](Function &F) -> TaskInfo & { + return FAM.getResult(F); + }; + auto GetAC = [&FAM](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); + }; + + bool Changed = TapirToTargetImpl(M, GetAA, GetDT, GetTI, GetAC, GetTLI).run(); + + if (Changed) + return PreservedAnalyses::none(); + return PreservedAnalyses::all(); +} + +namespace { +struct LowerTapirToTarget : public ModulePass { + static char ID; // Pass identification, replacement for typeid + explicit LowerTapirToTarget() : ModulePass(ID) { + initializeLowerTapirToTargetPass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "Lower Tapir to target"; } + + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + } +}; +} // End of anonymous namespace + +char LowerTapirToTarget::ID = 0; +INITIALIZE_PASS_BEGIN(LowerTapirToTarget, "tapir2target", + "Lower Tapir to Target ABI", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(LowerTapirToTarget, "tapir2target", + "Lower Tapir to Target ABI", false, false) + +bool LowerTapirToTarget::runOnModule(Module &M) { + if (skipModule(M)) + return false; + auto GetAA = [this](Function &F) -> AAResults & { + return this->getAnalysis(F).getAAResults(); + }; + auto GetDT = [this](Function &F) -> DominatorTree & { + return this->getAnalysis(F).getDomTree(); + }; + auto GetTI = [this](Function &F) -> TaskInfo & { + return this->getAnalysis(F).getTaskInfo(); + }; + AssumptionCacheTracker *ACT = &getAnalysis(); + auto GetAC = [&ACT](Function &F) -> AssumptionCache & { + return ACT->getAssumptionCache(F); + }; + auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { + return this->getAnalysis().getTLI(F); + }; + + return TapirToTargetImpl(M, GetAA, GetDT, GetTI, GetAC, GetTLI).run(); +} + +// createLowerTapirToTargetPass - Provide an entry point to create this pass. +// +namespace llvm { +ModulePass *createLowerTapirToTargetPass() { return new LowerTapirToTarget(); } +} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index f06ea89cc61d4e..39c38991957896 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -196,6 +196,17 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, Instruction *PTI = PredBB->getTerminator(); if (PTI->isExceptionalTerminator() || PTI->mayHaveSideEffects()) return false; + // Don't break syncs. + if (isa(PredBB->getTerminator())) return false; + // Don't break entry blocks of detached CFG's. + for (pred_iterator PI = pred_begin(PredBB), PE = pred_end(PredBB); + PI != PE; ++PI) { + BasicBlock *PredPredBB = *PI; + if (const DetachInst *DI = + dyn_cast(PredPredBB->getTerminator())) + if (DI->getDetached() == PredBB) + return false; + } // Can't merge if there are multiple distinct successors. if (!PredecessorWithTwoSuccessors && PredBB->getUniqueSuccessor() != BB) @@ -648,7 +659,43 @@ BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, DominatorTree *DT, // block. assert(BB->getTerminator()->getNumSuccessors() == 1 && "Should have a single succ!"); - return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); + if (SyncInst *OldSI = dyn_cast(BB->getTerminator())) { + // Insert a new basic block after BB. + std::string Name = BBName.str(); + BasicBlock *NewBB = BasicBlock::Create( + BB->getContext(), Name.empty() ? BB->getName() + ".split" : Name, + BB->getParent(), BB->getNextNode()); + DebugLoc Loc = Succ->front().getDebugLoc(); + // Terminate that block with an unconditional branch to Succ. + BranchInst::Create(Succ, NewBB)->setDebugLoc(Loc); + // Update the successor of the sync instruction to be NewBB. + OldSI->setSuccessor(0, NewBB); + // Update any PHI ndes in Succ. + NewBB->replaceSuccessorsPhiUsesWith(BB, NewBB); + + // The new block lives in whichever loop the old one did. This preserves + // LCSSA as well, because we force the split point to be after any PHI + // nodes. + if (LI) + if (Loop *L = LI->getLoopFor(BB)) + L->addBasicBlockToLoop(NewBB, *LI); + + if (DT) + // Old dominates New. New node dominates all other nodes dominated by Old. + if (DomTreeNode *OldNode = DT->getNode(BB)) { + std::vector Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(NewBB, BB); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + + // Note: We don't need to update MSSA in this case, because the sync + // instruction remains in the original basic block. + return NewBB; + } else { + return SplitBlock(BB, BB->getTerminator(), DT, LI, MSSAU, BBName); + } } void llvm::setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ) { diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp index ddb35756030f03..720b427ad1cc16 100644 --- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -31,6 +31,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" using namespace llvm; @@ -113,10 +114,29 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, const Twine &BBName) { assert(!isa(TI) && "Cannot split critical edge from IndirectBrInst"); + assert(!isa(TI) && + "Cannot split critical edge from ReattachInst"); + + bool SplittingDetachContinue = + isa(TI) || (isDetachedRethrow(TI) && (1 == SuccNum)) || + (isa(TI) && ((1 == SuccNum) || (2 == SuccNum))); + if (SplittingDetachContinue) + assert((Options.SplitDetachContinue && Options.DT) && + "Cannot split critical continuation edge from a detach"); BasicBlock *TIBB = TI->getParent(); BasicBlock *DestBB = TI->getSuccessor(SuccNum); + // If we're splitting a detach-continue edge, get the associated reattaches. + SmallVector Reattaches; + if (SplittingDetachContinue) { + BasicBlockEdge DetachEdge(TIBB, TI->getSuccessor(0)); + for (BasicBlock *Pred : predecessors(DestBB)) + if (isa(Pred->getTerminator())) + if (Options.DT->dominates(DetachEdge, Pred)) + Reattaches.push_back(Pred); + } + // Splitting the critical edge to a pad block is non-trivial. Don't do // it in this generic function. if (DestBB->isEHPad()) return nullptr; @@ -184,6 +204,12 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, // Branch to the new block, breaking the edge. TI->setSuccessor(SuccNum, NewBB); + // If we're splitting a detach-continue edge, redirect all appropriate + // reattach edges to branch to the new block + if (SplittingDetachContinue) + for (BasicBlock *RBB : Reattaches) + RBB->getTerminator()->setSuccessor(0, NewBB); + // If there are any PHI nodes in DestBB, we need to update them so that they // merge incoming values from NewBB instead of from TIBB. { @@ -203,6 +229,28 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum, BBIdx = PN->getBasicBlockIndex(TIBB); PN->setIncomingBlock(BBIdx, NewBB); } + + // Update the PHI node entries for the reattach predecessors as well. + if (SplittingDetachContinue) { + for (BasicBlock *RBB : Reattaches) { + unsigned BBIdx = 0; + for (BasicBlock::iterator I = DestBB->begin(); isa(I); ++I) { + // We no longer enter through RBB, now we come in through NewBB. + // Revector exactly one entry in the PHI node that used to come from + // TIBB to come from NewBB. + PHINode *PN = cast(I); + + // Reuse the previous value of BBIdx if it lines up. In cases where we + // have multiple phi nodes with *lots* of predecessors, this is a speed + // win because we don't have to scan the PHI looking for TIBB. This + // happens because the BB list of PHI nodes are usually in the same + // order. + if (PN->getIncomingBlock(BBIdx) != RBB) + BBIdx = PN->getBasicBlockIndex(RBB); + PN->removeIncomingValue(BBIdx); + } + } + } } // If there are any other edges from TIBB to DestBB, update those to go diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp index 5de8ff84de7711..85d328d5bf6b65 100644 --- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp @@ -1388,6 +1388,20 @@ bool llvm::isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, isLibFuncEmittable(M, TLI, TheLibFunc); } +bool llvm::inferTapirTargetLibFuncAttributes(Function &F, + const TargetLibraryInfo &TLI) { + if (!TLI.isTapirTargetLibFunc(F)) + return false; + + bool Changed = false; + // FIXME: For now, we just set generic properties on Tapir-target library + // functions. + Changed |= setDoesNotFreeMemory(F); + Changed |= setWillReturn(F); + + return Changed; +} + bool llvm::hasFloatFn(const Module *M, const TargetLibraryInfo *TLI, Type *Ty, LibFunc DoubleFn, LibFunc FloatFn, LibFunc LongDoubleFn) { switch (Ty->getTypeID()) { diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index a870071f3f641d..ab4f19f43856d8 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -78,6 +78,9 @@ add_llvm_component_library(LLVMTransformUtils SplitModule.cpp StripNonLineTableDebugInfo.cpp SymbolRewriter.cpp + TapirUtils.cpp + TaskCanonicalize.cpp + TaskSimplify.cpp UnifyFunctionExitNodes.cpp UnifyLoopExits.cpp Utils.cpp diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index d55208602b715f..c6985eb05054fc 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -76,6 +76,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, CodeInfo->ContainsCalls |= hasCalls; CodeInfo->ContainsMemProfMetadata |= hasMemProfMetadata; CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; + CodeInfo->ContainsDetach |= isa(BB->getTerminator()); } return NewBB; } @@ -616,6 +617,7 @@ void PruningFunctionCloner::CloneBlock( CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas && BB != &BB->getParent()->front(); + CodeInfo->ContainsDetach |= isa(BB->getTerminator()); } } @@ -627,6 +629,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, + SmallVectorImpl &Resumes, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -915,9 +918,12 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // because we can iteratively remove and merge returns above. for (Function::iterator I = cast(VMap[StartingBB])->getIterator(), E = NewFunc->end(); - I != E; ++I) + I != E; ++I) { if (ReturnInst *RI = dyn_cast(I->getTerminator())) Returns.push_back(RI); + if (ResumeInst *RI = dyn_cast(I->getTerminator())) + Resumes.push_back(RI); + } } /// This works exactly like CloneFunctionInto, @@ -930,9 +936,11 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, void llvm::CloneAndPruneFunctionInto( Function *NewFunc, const Function *OldFunc, ValueToValueMapTy &VMap, bool ModuleLevelChanges, SmallVectorImpl &Returns, + SmallVectorImpl &Resumes, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap, - ModuleLevelChanges, Returns, NameSuffix, CodeInfo); + ModuleLevelChanges, Returns, Resumes, NameSuffix, + CodeInfo); } /// Remaps instructions in \p Blocks using the mapping in \p VMap. diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index c390af351a6946..e8ff3e6c91ac60 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -905,15 +905,20 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::AllocSize: case Attribute::Builtin: case Attribute::Convergent: + case Attribute::HyperView: + case Attribute::Injective: case Attribute::JumpTable: case Attribute::Naked: case Attribute::NoBuiltin: case Attribute::NoMerge: case Attribute::NoReturn: case Attribute::NoSync: + case Attribute::ReducerRegister: + case Attribute::ReducerUnregister: case Attribute::ReturnsTwice: case Attribute::Speculatable: case Attribute::StackAlignment: + case Attribute::Stealable: case Attribute::WillReturn: case Attribute::AllocKind: case Attribute::PresplitCoroutine: @@ -946,6 +951,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::SafeStack: case Attribute::ShadowCallStack: case Attribute::SanitizeAddress: + case Attribute::SanitizeCilk: case Attribute::SanitizeMemory: case Attribute::SanitizeThread: case Attribute::SanitizeHWAddress: @@ -954,6 +960,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::StackProtect: case Attribute::StackProtectReq: case Attribute::StackProtectStrong: + case Attribute::StrandPure: case Attribute::StrictFP: case Attribute::UWTable: case Attribute::VScaleRange: @@ -982,6 +989,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs, case Attribute::ReadOnly: case Attribute::Returned: case Attribute::SExt: + case Attribute::StrandNoAlias: case Attribute::StructRet: case Attribute::SwiftError: case Attribute::SwiftSelf: diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f7b93fc8fd0609..f77184a084566a 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -60,9 +60,12 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/Triple.h" #include "llvm/Transforms/Utils/AssumeBundleBuilder.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -588,6 +591,269 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( return nullptr; } +// Helper method to check if the given UnwindEdge unwinds a taskframe, i.e., if +// it is terminated with a taskframe.resume intrinsic. +static bool isTaskFrameUnwind(const BasicBlock *UnwindEdge) { + return isTaskFrameResume(UnwindEdge->getTerminator()); +} + +static void splitTaskFrameEnds(Instruction *TFCreate) { + // Split taskframe.end that use TFCreate. + SmallVector TFEndToSplit; + for (User *U : TFCreate->users()) + if (IntrinsicInst *UI = dyn_cast(U)) + if (Intrinsic::taskframe_end == UI->getIntrinsicID()) + TFEndToSplit.push_back(UI); + + for (Instruction *TFEnd : TFEndToSplit) { + if (TFEnd != TFEnd->getParent()->getTerminator()->getPrevNode()) { + BasicBlock::iterator Iter = ++TFEnd->getIterator(); + SplitBlock(TFEnd->getParent(), &*Iter); + // Try to attach debug info to the new terminator after the taskframe.end + // call. + Instruction *SplitTerminator = TFEnd->getParent()->getTerminator(); + if (!SplitTerminator->getDebugLoc()) + SplitTerminator->setDebugLoc(TFEnd->getDebugLoc()); + Iter->getParent()->setName(TFEnd->getParent()->getName() + ".tfend"); + } + } +} + +// Recursively handle inlined tasks. +static void HandleInlinedTasksHelper( + SmallPtrSetImpl &BlocksToProcess, + BasicBlock *FirstNewBlock, BasicBlock *UnwindEdge, + BasicBlock *Unreachable, Value *CurrentTaskFrame, + SmallVectorImpl *ParentWorklist, + LandingPadInliningInfo &Invoke, + SmallPtrSetImpl &InlinedLPads) { + SmallVector DetachesToReplace; + SmallVector Worklist; + // TODO: See if we need a global Visited set over all recursive calls, i.e., + // to handle shared exception-handling blocks. + SmallPtrSet Visited; + Worklist.push_back(FirstNewBlock); + do { + BasicBlock *BB = Worklist.pop_back_val(); + // Skip blocks we've seen before + if (!Visited.insert(BB).second) + continue; + // Skip blocks not in the set to process. + if (!BlocksToProcess.count(BB)) + continue; + + if (Instruction *TFCreate = + FindTaskFrameCreateInBlock(BB, CurrentTaskFrame)) { + // Split the block at the taskframe.create, if necessary. + BasicBlock *NewBB; + if (TFCreate != &BB->front()) { + NewBB = SplitBlock(BB, TFCreate); + BlocksToProcess.insert(NewBB); + } else + NewBB = BB; + + // Split any blocks containing taskframe.end intrinsics that use + // TFCreate. + splitTaskFrameEnds(TFCreate); + + // Create an unwind edge for the taskframe. + BasicBlock *TaskFrameUnwindEdge = + CreateSubTaskUnwindEdge(Intrinsic::taskframe_resume, TFCreate, + UnwindEdge, Unreachable, TFCreate); + + // Recursively check all blocks + HandleInlinedTasksHelper(BlocksToProcess, NewBB, TaskFrameUnwindEdge, + Unreachable, TFCreate, &Worklist, Invoke, + InlinedLPads); + + // Remove the unwind edge for the taskframe if it is not needed. + if (pred_empty(TaskFrameUnwindEdge)) + TaskFrameUnwindEdge->eraseFromParent(); + continue; + } + + // Promote any calls in the block to invokes. + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + BB, UnwindEdge)) { + // If this is the topmost invocation of HandleInlinedTasksHelper, update + // any PHI nodes in the exceptional block to indicate that there is now a + // new entry in them. + if (nullptr == ParentWorklist) + Invoke.addIncomingPHIValuesFor(NewBB); + BlocksToProcess.insert( + cast(NewBB->getTerminator())->getNormalDest()); + } + + // Forward any resumes that are remaining here. + if (ResumeInst *RI = dyn_cast(BB->getTerminator())) + Invoke.forwardResume(RI, InlinedLPads); + + // Ignore reattach terminators. + if (isa(BB->getTerminator()) || + isDetachedRethrow(BB->getTerminator())) + continue; + + // If we find a taskframe.end, add its successor to the parent search. + if (endsTaskFrame(BB, CurrentTaskFrame)) { + // We may not have a parent worklist, if inlining itself created + // the taskframe. + if (ParentWorklist) + ParentWorklist->push_back(BB->getSingleSuccessor()); + continue; + } + + // If we find a taskframe.resume terminator, add its successor to the parent + // search. + if (isTaskFrameResume(BB->getTerminator()) && ParentWorklist) { + assert(isTaskFrameUnwind(UnwindEdge) && + "Unexpected taskframe.resume, doesn't correspond to unwind edge"); + InvokeInst *II = cast(BB->getTerminator()); + + // We may not have a parent worklist, however, if inlining itself created + // the taskframe. + if (ParentWorklist) + ParentWorklist->push_back(II->getUnwindDest()); + continue; + } + + // Process a detach instruction specially. In particular, process the + // spawned task recursively. + if (DetachInst *DI = dyn_cast(BB->getTerminator())) { + if (!DI->hasUnwindDest()) { + // Create an unwind edge for the subtask, which is terminated with a + // detached-rethrow. + BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge, + Unreachable, DI); + + // Recursively check all blocks in the detached task. + HandleInlinedTasksHelper(BlocksToProcess, DI->getDetached(), + SubTaskUnwindEdge, Unreachable, + CurrentTaskFrame, &Worklist, Invoke, + InlinedLPads); + + // If the new unwind edge is not used, remove it. + if (pred_empty(SubTaskUnwindEdge)) + SubTaskUnwindEdge->eraseFromParent(); + else { + DetachesToReplace.push_back(DI); + // Update PHI nodes in the exceptional block to indicate that + // SubTaskUnwindEdge is a new entry in them. This should only have an + // effect for the topmost call to HandleInlinedTasksHelper. + Invoke.addIncomingPHIValuesFor(SubTaskUnwindEdge); + } + + } else if (Visited.insert(DI->getUnwindDest()).second) { + // If the detach-unwind isn't dead, add it to the worklist. + Worklist.push_back(DI->getUnwindDest()); + } + // Add the continuation to the worklist. + if (CurrentTaskFrame && isTaskFrameUnwind(UnwindEdge) && + (CurrentTaskFrame == getTaskFrameUsed(DI->getDetached()))) { + // This detach-continuation terminates the current taskframe, so push it + // onto the parent worklist. + assert(ParentWorklist && "Unexpected taskframe unwind edge"); + ParentWorklist->push_back(DI->getContinue()); + } else { + // We can process this detach-continuation directly, because it does not + // terminate the current taskframe. + Worklist.push_back(DI->getContinue()); + } + continue; + } + + // In the normal case, add all successors of BB to the worklist. + for (BasicBlock *Successor : successors(BB)) + Worklist.push_back(Successor); + + } while (!Worklist.empty()); + + // Replace detaches that now require unwind destinations. + while (!DetachesToReplace.empty()) { + DetachInst *DI = DetachesToReplace.pop_back_val(); + // If this is the topmost invocation of HandleInlinedTasksHelper, update any + // PHI nodes in the exceptional block to indicate that there is now a new + // entry in them. + if (nullptr == ParentWorklist) + Invoke.addIncomingPHIValuesFor(DI->getParent()); + ReplaceInstWithInst(DI, DetachInst::Create( + DI->getDetached(), DI->getContinue(), UnwindEdge, + DI->getSyncRegion())); + } +} + +static void HandleInlinedTasks( + SmallPtrSetImpl &BlocksToProcess, BasicBlock *FirstNewBlock, + Value *TFCreate, BasicBlock *UnwindEdge, LandingPadInliningInfo &Invoke, + SmallPtrSetImpl &InlinedLPads) { + Function *Caller = UnwindEdge->getParent(); + + // Create the normal return for the detached rethrow. + BasicBlock *UnreachableBlk = BasicBlock::Create( + Caller->getContext(), UnwindEdge->getName()+".unreachable", Caller); + + // Recursively handle inlined tasks. + HandleInlinedTasksHelper(BlocksToProcess, FirstNewBlock, UnwindEdge, + UnreachableBlk, TFCreate, nullptr, Invoke, + InlinedLPads); + + // Either finish the unreachable block or remove it, depending on whether it + // is used. + if (!pred_empty(UnreachableBlk)) { + IRBuilder<> Builder(UnreachableBlk); + Builder.CreateUnreachable(); + } else { + UnreachableBlk->eraseFromParent(); + } +} + +static void GetInlinedLPads(SmallPtrSetImpl &BlocksToProcess, + SmallPtrSetImpl &InlinedLPads) { + SmallVector Worklist; + SmallPtrSet Visited; + + // Push all blocks to process that are terminated by a resume onto the + // worklist. + for (BasicBlock *BB : BlocksToProcess) + if (isa(BB->getTerminator())) + Worklist.push_back(BB); + + // Traverse the blocks to process from the resumes going backwards (through + // predecessors). + while(!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + // Skip blocks we've seen before + if (!Visited.insert(BB).second) + continue; + // Skip blocks not in the set to process. + if (!BlocksToProcess.count(BB)) + continue; + + // If BB is a landingpad... + if (BB->isLandingPad()) { + // Record BB's landingpad instruction. + InlinedLPads.insert(BB->getLandingPadInst()); + + // Add predecessors of BB to the worklist, skipping predecessors via a + // detached.rethrow or taskframe.resume. + for (BasicBlock *Predecessor : predecessors(BB)) + if (!isDetachedRethrow(Predecessor->getTerminator()) && + !isTaskFrameResume(Predecessor->getTerminator())) + Worklist.push_back(Predecessor); + + continue; + } + + // In the normal case, add predecessors of BB to the worklist, excluding + // predecessors via reattach, detached.rethrow, or taskframe.resume + for (BasicBlock *Predecessor : predecessors(BB)) + if (!isa(Predecessor->getTerminator()) && + !isDetachedRethrow(Predecessor->getTerminator()) && + !isTaskFrameResume(Predecessor->getTerminator())) + Worklist.push_back(Predecessor); + } +} + /// If we inlined an invoke site, we need to convert calls /// in the body of the inlined function into invokes. /// @@ -595,6 +861,7 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( /// block of the inlined code (the last block is the end of the function), /// and InlineCodeInfo is information about the code that got inlined. static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, + Value *TFCreate, ClonedCodeInfo &InlinedCodeInfo) { BasicBlock *InvokeDest = II->getUnwindDest(); @@ -605,6 +872,41 @@ static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock, // rewrite. LandingPadInliningInfo Invoke(II); + // Special processing is needed to inline a function that contains a task. + if (InlinedCodeInfo.ContainsDetach) { + // Get the set of blocks for the inlined function. + SmallPtrSet BlocksToProcess; + for (Function::iterator BB = FirstNewBlock->getIterator(), + E = Caller->end(); BB != E; ++BB) + BlocksToProcess.insert(&*BB); + + // Get all of the inlined landing pad instructions. + SmallPtrSet InlinedLPads; + GetInlinedLPads(BlocksToProcess, InlinedLPads); + + // Append the clauses from the outer landing pad instruction into the + // inlined landing pad instructions. + LandingPadInst *OuterLPad = Invoke.getLandingPadInst(); + for (LandingPadInst *InlinedLPad : InlinedLPads) { + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + + // Process inlined subtasks. + HandleInlinedTasks(BlocksToProcess, FirstNewBlock, TFCreate, + Invoke.getOuterResumeDest(), Invoke, InlinedLPads); + // Now that everything is happy, we have one final detail. The PHI nodes in + // the exception destination block still have entries due to the original + // invoke instruction. Eliminate these entries (which might even delete the + // PHI node) now. + InvokeDest->removePredecessor(II->getParent()); + return; + } + // Get all of the inlined landing pad instructions. SmallPtrSet InlinedLPads; for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end(); @@ -1515,9 +1817,10 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg, if (ByValAlignment) Alignment = std::max(Alignment, *ByValAlignment); + BasicBlock *NewCtx = GetDetachedCtx(TheCall->getParent()); Value *NewAlloca = new AllocaInst(ByValType, DL.getAllocaAddrSpace(), nullptr, Alignment, - Arg->getName(), &*Caller->begin()->begin()); + Arg->getName(), &*NewCtx->begin()); IFI.StaticAllocas.push_back(cast(NewAlloca)); // Uses of the argument in the function should use our new alloca @@ -1924,6 +2227,125 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind, } } +static bool isTaskFrameCreate(const Instruction &I) { + if (const IntrinsicInst *II = dyn_cast(&I)) + return Intrinsic::taskframe_create == II->getIntrinsicID(); + return false; +} + +static BasicBlock *SplitResume(ResumeInst *RI, Intrinsic::ID TermFunc, + Value *Token, BasicBlock *Unreachable) { + Value *RIValue = RI->getValue(); + BasicBlock *OldBB = RI->getParent(); + Module *M = OldBB->getModule(); + + // Split the resume block at the resume. + BasicBlock *NewBB = SplitBlock(OldBB, RI); + + // Invoke the specified terminator function at the end of the old block. + InvokeInst *TermFuncInvoke = InvokeInst::Create( + Intrinsic::getDeclaration(M, TermFunc, { RIValue->getType() }), + Unreachable, NewBB, { Token, RIValue }); + ReplaceInstWithInst(OldBB->getTerminator(), TermFuncInvoke); + + // Insert a landingpad at the start of the new block. + IRBuilder<> Builder(RI); + LandingPadInst *LPad = Builder.CreateLandingPad(RIValue->getType(), 0, + RIValue->getName()); + LPad->setCleanup(true); + + // Replace the argument of the resume with the value of the new landingpad. + RI->setOperand(0, LPad); + + return NewBB; +} + +static void HandleInlinedResumeInTask(BasicBlock *EntryBlock, BasicBlock *Ctx, + ResumeInst *Resume, + BasicBlock *Unreachable) { + // If the DetachedBlock has no predecessor, then it is the entry of the + // function. There's nothing to do in this case, so simply return. + if (pred_empty(EntryBlock) && EntryBlock == Ctx) + return; + + BasicBlock *Parent = + (EntryBlock != Ctx ? Ctx : EntryBlock->getSinglePredecessor()); + Module *M = Parent->getModule(); + if (isTaskFrameCreate(EntryBlock->front())) { + Value *TaskFrame = &EntryBlock->front(); + if (InvokeInst *TFResume = getTaskFrameResume(TaskFrame)) { + BasicBlock *ResumeDest = TFResume->getUnwindDest(); + // Replace the resume with a taskframe.resume, whose unwind destination + // matches the unwind destination of the taskframe. + InvokeInst *NewTFResume = InvokeInst::Create( + Intrinsic::getDeclaration(M, Intrinsic::taskframe_resume, + {Resume->getValue()->getType()}), + Unreachable, ResumeDest, {TaskFrame, Resume->getValue()}); + ReplaceInstWithInst(Resume, NewTFResume); + + // Update PHI nodes in ResumeDest. + for (PHINode &PN : ResumeDest->phis()) + // Add an entry to the PHI node for the new predecessor block, + // NewTFResume->getParent(), using the same value as that from + // TFResume->getParent(). + PN.addIncoming(PN.getIncomingValueForBlock(TFResume->getParent()), + NewTFResume->getParent()); + + // No need to continue up the stack of contexts. + return; + } + + // Otherwise, split the resume to insert a novel invocation of + // taskframe.resume for this taskframe. + SplitResume(Resume, Intrinsic::taskframe_resume, TaskFrame, Unreachable); + + // Recursively handle parent contexts. + if (EntryBlock != Ctx) + HandleInlinedResumeInTask(Ctx, Ctx, Resume, Unreachable); + else { + BasicBlock *NewCtx = GetDetachedCtx(Parent); + HandleInlinedResumeInTask(NewCtx, NewCtx, Resume, Unreachable); + } + + } else { + assert(EntryBlock == Ctx && "Unexpected context for detached entry block."); + DetachInst *DI = cast(Parent->getTerminator()); + Value *SyncRegion = DI->getSyncRegion(); + + if (DI->hasUnwindDest()) { + // Replace the resume with a detached.rethrow, whose unwind destination + // matches the unwind destination of the detach. + BasicBlock *DetUnwind = DI->getUnwindDest(); + InvokeInst *NewDetRethrow = InvokeInst::Create( + Intrinsic::getDeclaration(M, Intrinsic::detached_rethrow, + {Resume->getValue()->getType()}), + Unreachable, DetUnwind, {SyncRegion, Resume->getValue()}); + ReplaceInstWithInst(Resume, NewDetRethrow); + + // Update PHI nodes in unwind dest. + for (PHINode &PN : DetUnwind->phis()) + // Add an entry to the PHI node for the new predecessor block, + // NewDetRethrow->getParent(), using the same value as that from Parent. + PN.addIncoming(PN.getIncomingValueForBlock(Parent), + NewDetRethrow->getParent()); + + // No need to continue up the stack of contexts. + return; + } + + // Insert an invocation of detached.rethrow before the resume. + BasicBlock *NewBB = SplitResume(Resume, Intrinsic::detached_rethrow, + SyncRegion, Unreachable); + + // Add NewBB as the unwind destination of DI. + ReplaceInstWithInst(DI, DetachInst::Create(EntryBlock, DI->getContinue(), + NewBB, SyncRegion)); + // Recursively handle parent contexts. + BasicBlock *NewCtx = GetDetachedCtx(Parent); + HandleInlinedResumeInTask(NewCtx, NewCtx, Resume, Unreachable); + } +} + /// This function inlines the called function into the basic block of the /// caller. This returns false if it is not possible to inline this call. /// The program is still in a well defined state if this occurs though. @@ -2010,14 +2432,44 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, ? Caller->getPersonalityFn()->stripPointerCasts() : nullptr; if (CalledPersonality) { + Triple T(Caller->getParent()->getTargetTriple()); if (!CallerPersonality) Caller->setPersonalityFn(CalledPersonality); - // If the personality functions match, then we can perform the - // inlining. Otherwise, we can't inline. - // TODO: This isn't 100% true. Some personality functions are proper - // supersets of others and can be used in place of the other. - else if (CalledPersonality != CallerPersonality) - return InlineResult::failure("incompatible personality"); + else if (CalledPersonality != CallerPersonality) { + // See if we want to replace CallerPersonality with the CalledPersonality, + // because CalledPersonality is a proper superset. + if (classifyEHPersonality(CallerPersonality) == + getDefaultEHPersonality(T)) + // The caller is using the default personality function. We assume + // CalledPersonality is a superset. + Caller->setPersonalityFn(CalledPersonality); + + else if (classifyEHPersonality(CalledPersonality) == + EHPersonality::Cilk_CXX && + classifyEHPersonality(CallerPersonality) == + EHPersonality::GNU_CXX) + // The Cilk personality is a superset of the caller's. + Caller->setPersonalityFn(CalledPersonality); + + // If the personality functions match, then we can perform the + // inlining. Otherwise, we can't inline. + // TODO: This isn't 100% true. Some personality functions are proper + // supersets of others and can be used in place of the other. + else { + EHPersonality CalledEHPersonality = + classifyEHPersonality(CalledPersonality); + // We can inline if: + // - CalledPersonality is the default personality, or + // - CallerPersonality is the Cilk personality and CalledPersonality is + // GNU_CXX. + // Otherwise, declare that we can't inline. + if (CalledEHPersonality != getDefaultEHPersonality(T) && + (classifyEHPersonality(CallerPersonality) != + EHPersonality::Cilk_CXX || + CalledEHPersonality != EHPersonality::GNU_CXX)) + return InlineResult::failure("incompatible personality"); + } + } } // We need to figure out which funclet the callsite was in so that we may @@ -2057,6 +2509,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } + // Canonicalize the caller by splitting blocks containing taskframe.create + // intrinsics. + if (splitTaskFrameCreateBlocks(*Caller)) + OrigBB = CB.getParent(); + // Determine if we are dealing with a call in an EHPad which does not unwind // to caller. bool EHPadForCallUnwindsLocally = false; @@ -2070,6 +2527,18 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, !isa(CallSiteUnwindDestToken); } + // Get the entry block of the detached context into which we're inlining. If + // we move allocas from the inlined code, we must move them to this block. + BasicBlock *DetachedCtxEntryBlock; + { + DetachedCtxEntryBlock = GetDetachedCtx(OrigBB); + assert(((&(Caller->getEntryBlock()) == DetachedCtxEntryBlock) || + pred_empty(DetachedCtxEntryBlock) || + DetachedCtxEntryBlock->getSinglePredecessor()) && + "Entry block of detached context has multiple predecessors."); + } + bool MayBeUnsyncedAtCall = mayBeUnsynced(OrigBB); + // Get an iterator to the last basic block in the function, which will have // the new function inlined after it. Function::iterator LastBlock = --Caller->end(); @@ -2077,6 +2546,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Make sure to capture all of the return instructions from the cloned // function. SmallVector Returns; + SmallVector Resumes; ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; @@ -2141,8 +2611,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, - /*ModuleLevelChanges=*/false, Returns, ".i", - &InlinedFunctionInfo); + /*ModuleLevelChanges=*/false, Returns, Resumes, + ".i", &InlinedFunctionInfo); // Remember the first block that is newly cloned over. FirstNewBlock = LastBlock; ++FirstNewBlock; @@ -2265,7 +2735,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // calculate which instruction they should be inserted before. We insert the // instructions at the end of the current alloca list. { - BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + BasicBlock::iterator InsertPoint = DetachedCtxEntryBlock->begin(); + if (isTaskFrameCreate(*InsertPoint)) + InsertPoint++; for (BasicBlock::iterator I = FirstNewBlock->begin(), E = FirstNewBlock->end(); I != E; ) { AllocaInst *AI = dyn_cast(I++); @@ -2296,8 +2768,29 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Transfer all of the allocas over in a block. Using splice means // that the instructions aren't removed from the symbol table, then // reinserted. - Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock, - AI->getIterator(), I); + DetachedCtxEntryBlock->splice(InsertPoint, &*FirstNewBlock, + AI->getIterator(), I); + } + + // Move any syncregion_start's into the entry basic block. Avoid moving + // syncregions if we'll need to insert a taskframe for this inlined call. + if (InlinedFunctionInfo.ContainsDetach && + !InlinedFunctionInfo.ContainsDynamicAllocas && !MayBeUnsyncedAtCall) { + for (BasicBlock::iterator I = FirstNewBlock->begin(), + E = FirstNewBlock->end(); I != E; ) { + IntrinsicInst *II = dyn_cast(I++); + if (!II) continue; + if (Intrinsic::syncregion_start != II->getIntrinsicID()) + continue; + + while (isa(I) && + Intrinsic::syncregion_start == + cast(I)->getIntrinsicID()) + ++I; + + DetachedCtxEntryBlock->splice(InsertPoint, &*FirstNewBlock, + II->getIterator(), I); + } } } @@ -2453,7 +2946,46 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // If the inlined code contained dynamic alloca instructions, wrap the inlined // code with llvm.stacksave/llvm.stackrestore intrinsics. - if (InlinedFunctionInfo.ContainsDynamicAllocas) { + CallInst *TFCreate = nullptr; + BasicBlock *TFEntryBlock = DetachedCtxEntryBlock; + if (InlinedFunctionInfo.ContainsDetach && + (InlinedFunctionInfo.ContainsDynamicAllocas || MayBeUnsyncedAtCall)) { + Module *M = Caller->getParent(); + // Get the two intrinsics we care about. + Function *TFCreateFn = + Intrinsic::getDeclaration(M, Intrinsic::taskframe_create); + + // Insert the llvm.taskframe.create. + TFCreate = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) + .CreateCall(TFCreateFn, {}, "tf.i"); + TFCreate->setDebugLoc(CB.getDebugLoc()); + TFEntryBlock = &*FirstNewBlock; + + // If we're inlining an invoke, insert a taskframe.resume at the unwind + // destination of the invoke. + if (auto *II = dyn_cast(&CB)) { + BasicBlock *UnwindEdge = II->getUnwindDest(); + // Create the normal return for the detached rethrow. + BasicBlock *UnreachableBlk = BasicBlock::Create( + Caller->getContext(), UnwindEdge->getName()+".unreachable", Caller); + { // Add an unreachable instruction to the end of UnreachableBlk. + IRBuilder<> Builder(UnreachableBlk); + Builder.CreateUnreachable(); + } + + // Create an unwind edge for the taskframe. + BasicBlock *TaskFrameUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::taskframe_resume, TFCreate, UnwindEdge, + UnreachableBlk, II); + + for (PHINode &PN : UnwindEdge->phis()) + PN.replaceIncomingBlockWith(II->getParent(), TaskFrameUnwindEdge); + + // Replace the unwind destination of the invoke with the unwind edge for + // the taskframe. + II->setUnwindDest(TaskFrameUnwindEdge); + } + } else if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); // Get the two intrinsics we care about. Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); @@ -2484,10 +3016,51 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, BasicBlock *UnwindDest = II->getUnwindDest(); Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); if (isa(FirstNonPHI)) { - HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + HandleInlinedLandingPad(II, &*FirstNewBlock, TFCreate, + InlinedFunctionInfo); } else { HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); } + } else if (!Resumes.empty() && (&Caller->getEntryBlock() != TFEntryBlock)) { + // If we inlined into a detached task, and the inlined function contains + // resumes, then we need to insert additional calls to EH intrinsics, + // specifically, detached.rethrow and taskframe.resume. + + // Create the normal (unreachable) return for the invocations of EH + // intrinsics. + BasicBlock *UnreachableBlk = BasicBlock::Create( + Caller->getContext(), CalledFunc->getName()+".unreachable", + Caller); + { // Add an unreachable instruction to the end of UnreachableBlk. + IRBuilder<> Builder(UnreachableBlk); + Builder.CreateUnreachable(); + } + + ResumeInst *Resume = Resumes[0]; + + // If multiple resumes were inlined, unify them, so that the detach + // instruction has a single unwind destination. + if (Resumes.size() > 1) { + // Create the unified resume block. + BasicBlock *UnifiedResume = BasicBlock::Create( + Caller->getContext(), "eh.unified.resume.i", Caller); + // Add a PHI node at the beginning of the block. + IRBuilder<> Builder(UnifiedResume); + PHINode *PN = + Builder.CreatePHI(Resume->getValue()->getType(), Resumes.size()); + for (ResumeInst *RI : Resumes) { + // Insert incoming values to the PHI node. + PN->addIncoming(RI->getValue(), RI->getParent()); + // Replace the resume with a branch to the unified block. + ReplaceInstWithInst(RI, BranchInst::Create(UnifiedResume)); + } + // Insert a resume instruction at the end of the block. + Resume = Builder.CreateResume(PN); + } + + // Handle resumes within the task. + HandleInlinedResumeInTask(TFEntryBlock, DetachedCtxEntryBlock, Resume, + UnreachableBlk); } // Update the lexical scopes of the new funclets and callsites. @@ -2708,6 +3281,14 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, AfterCallBB, IFI.CallerBFI->getBlockFreq(OrigBB).getFrequency()); } + // If we inserted a taskframe.create, insert a taskframe.end at the start of + // AfterCallBB. + if (TFCreate) { + Function *TFEndFn = Intrinsic::getDeclaration(Caller->getParent(), + Intrinsic::taskframe_end); + IRBuilder<>(&AfterCallBB->front()).CreateCall(TFEndFn, TFCreate); + } + // Change the branch that used to go to AfterCallBB to branch to the first // basic block of the inlined function. // diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp index c36b0533580b97..6d786351136797 100644 --- a/llvm/lib/Transforms/Utils/LCSSA.cpp +++ b/llvm/lib/Transforms/Utils/LCSSA.cpp @@ -88,6 +88,8 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // instructions within the same loops, computing the exit blocks is // expensive, and we're not mutating the loop structure. SmallDenseMap> LoopExitBlocks; + // Similarly, cache the Loop TaskExits across this loop. + SmallDenseMap> LoopTaskExits; while (!Worklist.empty()) { UsesToRewrite.clear(); @@ -105,6 +107,11 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, if (ExitBlocks.empty()) continue; + if (!LoopTaskExits.count(L)) + L->getTaskExits(LoopTaskExits[L]); + assert(LoopTaskExits.count(L)); + const SmallPtrSetImpl &TaskExits = LoopTaskExits[L]; + for (Use &U : make_early_inc_range(I->uses())) { Instruction *User = cast(U.getUser()); BasicBlock *UserBB = User->getParent(); @@ -121,7 +128,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, if (auto *PN = dyn_cast(User)) UserBB = PN->getIncomingBlock(U); - if (InstBB != UserBB && !L->contains(UserBB)) + if (InstBB != UserBB && !L->contains(UserBB) && !TaskExits.count(UserBB)) UsesToRewrite.push_back(&U); } @@ -177,7 +184,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl &Worklist, // If the exit block has a predecessor not within the loop, arrange for // the incoming value use corresponding to that predecessor to be // rewritten in terms of a different LCSSA PHI. - if (!L->contains(Pred)) + if (!L->contains(Pred) && !TaskExits.count(Pred)) UsesToRewrite.push_back( &PN->getOperandUse(PN->getOperandNumForIncomingValue( PN->getNumIncomingValues() - 1))); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index eeb0446c11975d..edef07c5420acb 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -75,6 +75,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include #include #include @@ -1059,6 +1060,14 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB, replaceUndefValuesInPhi(PN, IncomingValues); } +static bool BlockIsEntryOfTask(const BasicBlock *BB) { + if (const BasicBlock *PredBB = BB->getSinglePredecessor()) + if (const DetachInst *DI = dyn_cast(PredBB->getTerminator())) + if (DI->getDetached() == BB) + return true; + return false; +} + bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU) { assert(BB != &BB->getParent()->getEntryBlock() && @@ -1085,6 +1094,10 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, // something like a loop pre-header (or rarely, a part of an irreducible CFG); // folding the branch isn't profitable in that case anyway. if (!Succ->getSinglePredecessor()) { + // If Succ has multiple predecessors and BB is the entry of a detached task, + // we can't fold it BB into Succ. + if (BlockIsEntryOfTask(BB)) + return false; BasicBlock::iterator BBI = BB->begin(); while (isa(*BBI)) { for (Use &U : BBI->uses()) { @@ -2639,8 +2652,24 @@ static bool markAliveBlocks(Function &F, Instruction *llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { Instruction *TI = BB->getTerminator(); - if (auto *II = dyn_cast(TI)) + if (auto *II = dyn_cast(TI)) { + // If we're removing the unwind destination of a detached rethrow or + // taskframe resume, simply remove the intrinsic. + if (auto *Called = II->getCalledFunction()) { + if (Intrinsic::detached_rethrow == Called->getIntrinsicID() || + Intrinsic::taskframe_resume == Called->getIntrinsicID()) { + BranchInst *BI = BranchInst::Create(II->getNormalDest(), II); + BI->takeName(II); + BI->setDebugLoc(II->getDebugLoc()); + II->getUnwindDest()->removePredecessor(BB); + II->eraseFromParent(); + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, II->getUnwindDest()}}); + return BI; + } + } return changeToCall(II, DTU); + } Instruction *NewTI; BasicBlock *UnwindDest; @@ -2657,6 +2686,10 @@ Instruction *llvm::removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU) { NewTI = NewCatchSwitch; UnwindDest = CatchSwitch->getUnwindDest(); + } else if (auto *DI = dyn_cast(TI)) { + NewTI = DetachInst::Create(DI->getDetached(), DI->getContinue(), + DI->getSyncRegion(), DI); + UnwindDest = DI->getUnwindDest(); } else { llvm_unreachable("Could not find unwind successor"); } @@ -2708,6 +2741,73 @@ bool llvm::removeUnreachableBlocks(Function &F, DomTreeUpdater *DTU, DeleteDeadBlocks(BlocksToRemove.takeVector(), DTU); + removeDeadDetachUnwinds(F, DTU, MSSAU); + + return Changed; +} + +// Recursively check the task starting at TaskEntry to find detached-rethrows +// for tasks that cannot throw. +static bool recursivelyCheckDetachedRethrows( + BasicBlock *TaskEntry, SmallPtrSetImpl &DeadDU) { + SmallVector Worklist; + SmallPtrSet Visited; + BasicBlock *BB = TaskEntry; + Worklist.push_back(BB); + Visited.insert(BB); + do { + BB = Worklist.pop_back_val(); + + // Ignore reattach terminators + if (isa(BB->getTerminator())) + continue; + + // Detached-rethrow terminators indicate that the parent detach has a live + // unwind. + if (isDetachedRethrow(BB->getTerminator())) + return true; + + if (DetachInst *DI = dyn_cast(BB->getTerminator())) { + if (DI->hasUnwindDest()) { + // Recursively check all blocks in the detached task. + if (!recursivelyCheckDetachedRethrows(DI->getDetached(), DeadDU)) + DeadDU.insert(DI); + else if (Visited.insert(DI->getUnwindDest()).second) + // If the detach-unwind isn't dead, add it to the worklist. + Worklist.push_back(DI->getUnwindDest()); + } + + // We don't have to check the detached task for a detach with no unwind + // destination, because those tasks will not throw any exception. + + // Add the continuation to the worklist. + if (Visited.insert(DI->getContinue()).second) + Worklist.push_back(DI->getContinue()); + } else { + for (BasicBlock *Successor : successors(BB)) + if (Visited.insert(Successor).second) + Worklist.push_back(Successor); + } + } while (!Worklist.empty()); + return false; +} + +bool llvm::removeDeadDetachUnwinds(Function &F, DomTreeUpdater *DTU, + MemorySSAUpdater *MSSAU) { + SmallPtrSet DeadDU; + // Recusirvely check all tasks for dead detach-unwinds. + recursivelyCheckDetachedRethrows(&F.front(), DeadDU); + bool Changed = false; + // Scan the detach instructions and remove any dead detach-unwind edges. + for (BasicBlock &BB : F) + if (DetachInst *DI = dyn_cast(BB.getTerminator())) + if (DeadDU.count(DI)) { + removeUnwindEdge(&BB, DTU); + Changed = true; + } + // If any dead detach-unwinds were removed, remove unreachable blocks. + if (Changed) + removeUnreachableBlocks(F, DTU, MSSAU); return Changed; } diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp index d81db5647c608d..357295077e8026 100644 --- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/MemorySSA.h" #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfo.h" @@ -60,6 +61,7 @@ class LoopRotate { DominatorTree *DT; ScalarEvolution *SE; MemorySSAUpdater *MSSAU; + TaskInfo *TaskI; const SimplifyQuery &SQ; bool RotationOnly; bool IsUtilMode; @@ -69,10 +71,10 @@ class LoopRotate { LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode, - bool PrepareForLTO) + TaskInfo *TaskI, const SimplifyQuery &SQ, bool RotationOnly, + bool IsUtilMode, bool PrepareForLTO) : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), - MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly), + MSSAU(MSSAU), TaskI(TaskI), SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode), PrepareForLTO(PrepareForLTO) {} bool processLoop(Loop *L); @@ -675,6 +677,12 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); + if (TaskI && DT) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less + // computationally wasteful. + TaskI->recalculate(*DT->getRoot()->getParent(), *DT); + LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); ++NumRotated; @@ -804,6 +812,12 @@ bool LoopRotate::simplifyLoopLatch(Loop *L) { if (MSSAU && VerifyMemorySSA) MSSAU->getMemorySSA()->verifyMemorySSA(); + if (TaskI && DT) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less + // computationally wasteful. + TaskI->recalculate(*DT->getRoot()->getParent(), *DT); + return true; } @@ -829,6 +843,12 @@ bool LoopRotate::processLoop(Loop *L) { if ((MadeChange || SimplifiedLatch) && LoopMD) L->setLoopID(LoopMD); + if ((MadeChange || SimplifiedLatch) && TaskI && DT) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TaskI->recalculate(*DT->getRoot()->getParent(), *DT); + return MadeChange || SimplifiedLatch; } @@ -837,10 +857,11 @@ bool LoopRotate::processLoop(Loop *L) { bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, AssumptionCache *AC, DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU, - const SimplifyQuery &SQ, bool RotationOnly = true, + TaskInfo *TI, const SimplifyQuery &SQ, + bool RotationOnly = true, unsigned Threshold = unsigned(-1), bool IsUtilMode = true, bool PrepareForLTO) { - LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly, + LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, TI, SQ, RotationOnly, IsUtilMode, PrepareForLTO); return LR.processLoop(L); } diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp index 3e604fdf2e11ac..fc5395d44f9755 100644 --- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp +++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp @@ -55,6 +55,7 @@ #include "llvm/Analysis/MemorySSAUpdater.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Dominators.h" @@ -543,6 +544,14 @@ static bool simplifyOneLoop(Loop *L, SmallVectorImpl &Worklist, if (Preheader) Changed = true; } + // Ensure that the preheader is not terminated by a sync. + if (Preheader && isa(Preheader->getTerminator())) { + LLVM_DEBUG(dbgs() + << "LoopSimplify: Splitting sync-terminated preheader.\n"); + SplitEdge(Preheader, L->getHeader(), DT, LI, MSSAU); + Changed = true; + Preheader = L->getLoopPreheader(); + } // Next, check to make sure that all exit nodes of the loop only have // predecessors that are inside of the loop. This check guarantees that the @@ -769,6 +778,7 @@ namespace { AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); } /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. @@ -798,6 +808,8 @@ bool LoopSimplify::runOnFunction(Function &F) { DominatorTree *DT = &getAnalysis().getDomTree(); auto *SEWP = getAnalysisIfAvailable(); ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + auto *TIWP = getAnalysisIfAvailable(); + TaskInfo *TI = TIWP ? &TIWP->getTaskInfo() : nullptr; AssumptionCache *AC = &getAnalysis().getAssumptionCache(F); MemorySSA *MSSA = nullptr; @@ -821,6 +833,12 @@ bool LoopSimplify::runOnFunction(Function &F) { assert(InLCSSA && "LCSSA is broken after loop-simplify."); } #endif + if (Changed && TI) + // Update TaskInfo manually using the updated DT. + // + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI->recalculate(F, *DT); return Changed; } @@ -830,6 +848,7 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, LoopInfo *LI = &AM.getResult(F); DominatorTree *DT = &AM.getResult(F); ScalarEvolution *SE = AM.getCachedResult(F); + TaskInfo *TI = AM.getCachedResult(F); AssumptionCache *AC = &AM.getResult(F); auto *MSSAAnalysis = AM.getCachedResult(F); std::unique_ptr MSSAU; @@ -848,6 +867,13 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, if (!Changed) return PreservedAnalyses::all(); + if (Changed && TI) + // Update TaskInfo manually using the updated DT. + // + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI->recalculate(F, *DT); + PreservedAnalyses PA; PA.preserve(); PA.preserve(); @@ -855,6 +881,7 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F, PA.preserve(); if (MSSAAnalysis) PA.preserve(); + PA.preserve(); // BPI maps conditional terminators to probabilities, LoopSimplify can insert // blocks, but it does so only by splitting existing blocks and edges. This // results in the interesting property that all new terminators inserted are diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index 511dd61308f927..1a53bd98838fe4 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -32,6 +32,7 @@ #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" @@ -62,6 +63,7 @@ #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include @@ -276,6 +278,111 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, } } +namespace llvm { +// Wrapper class for GraphTraits to examine task exits of a loop. +template struct TaskExitGraph { + const GraphType &Graph; + + inline TaskExitGraph(const GraphType &G) : Graph(G) {} +}; + +// GraphTraits to examine task exits of a loop, to support using the post_order +// iterator to examine the task exits. +template <> struct GraphTraits> { + using NodeRef = BasicBlock *; + + struct TaskExitFilter { + NodeRef TaskExitPred = nullptr; + TaskExitFilter(NodeRef TaskExit) : TaskExitPred(TaskExit) {} + bool operator()(NodeRef N) const { + return !isDetachedRethrow(TaskExitPred->getTerminator()) && + !isTaskFrameResume(TaskExitPred->getTerminator()); + } + }; + + using ChildIteratorType = filter_iterator; + + static NodeRef getEntryNode(TaskExitGraph G) { return G.Graph; } + static ChildIteratorType child_begin(NodeRef N) { + return make_filter_range(successors(N), TaskExitFilter(N)).begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return make_filter_range(successors(N), TaskExitFilter(N)).end(); + } +}; +} // namespace llvm + +// Clone task-exit blocks that are effectively part of the loop but don't appear +// to be based on standard loop analysis. +static void handleTaskExits( + SmallPtrSetImpl &TaskExits, + SmallPtrSetImpl &TaskExitSrcs, unsigned It, Loop *L, + BasicBlock *Header, BasicBlock *BBInsertPt, LoopInfo *LI, + NewLoopsMap &NewLoops, SmallSetVector &LoopsToSimplify, + ValueToValueMapTy &LastValueMap, SmallVectorImpl &NewBlocks, + std::vector &UnrolledLoopBlocks, DominatorTree *DT) { + // Get the TaskExits in reverse post order. Using post_order here seems + // necessary to ensure the custom filter for processing task exits is used. + SmallVector TaskExitsRPO; + for (BasicBlock *TEStart : TaskExitSrcs) + for (BasicBlock *BB : post_order>((TEStart))) + TaskExitsRPO.push_back(BB); + + if (TaskExitsRPO.empty()) + // No task exits to handle. + return; + + // Process the task exits similarly to loop blocks. + auto BlockInsertPt = std::next(BBInsertPt->getIterator()); + for (BasicBlock *BB : reverse(TaskExitsRPO)) { + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(BB, VMap, "." + Twine(It)); + Header->getParent()->insert(BlockInsertPt, New); + + assert(BB != Header && "Header should not be a task exit"); + // Tell LI about New. + if (LI->getLoopFor(BB)) { + const Loop *OldLoop = addClonedBlockToLoopInfo(BB, New, LI, NewLoops); + if (OldLoop) + LoopsToSimplify.insert(NewLoops[OldLoop]); + } + + // Update our running map of newest clones + LastValueMap[BB] = New; + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); + VI != VE; ++VI) + LastValueMap[VI->first] = VI->second; + + // Add phi entries for newly created values to all exit blocks. + for (BasicBlock *Succ : successors(BB)) { + if (L->contains(Succ) || TaskExits.count(Succ)) + continue; + for (PHINode &PHI : Succ->phis()) { + Value *Incoming = PHI.getIncomingValueForBlock(BB); + ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); + if (It != LastValueMap.end()) + Incoming = It->second; + PHI.addIncoming(Incoming, New); + } + } + + NewBlocks.push_back(New); + UnrolledLoopBlocks.push_back(New); + + // Update DomTree: since we just copy the loop body, and each copy has a + // dedicated entry block (copy of the header block), this header's copy + // dominates all copied blocks. That means, dominance relations in the + // copied body are the same as in the original body. + if (DT) { + auto BBDomNode = DT->getNode(BB); + auto BBIDom = BBDomNode->getIDom(); + BasicBlock *OriginalBBIDom = BBIDom->getBlock(); + DT->addNewBlock( + New, cast(LastValueMap[cast(OriginalBBIDom)])); + } + } +} + /// Unroll the given loop by Count. The loop must be in LCSSA form. Unrolling /// can only fail when the loop's latch block is not terminated by a conditional /// branch instruction. However, if the trip count (and multiple) are not known, @@ -295,7 +402,7 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, /// required and not fully unrolled). LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, - AssumptionCache *AC, + AssumptionCache *AC, TaskInfo *TI, const TargetTransformInfo *TTI, OptimizationRemarkEmitter *ORE, bool PreserveLCSSA, Loop **RemainderLoop) { @@ -387,6 +494,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // of the unrolled body exits. const bool CompletelyUnroll = ULO.Count == MaxTripCount; + // Disallow partial unrolling of Tapir loops. + if (getTaskIfTapirLoop(L, TI) && !CompletelyUnroll) + return LoopUnrollResult::Unmodified; + const bool PreserveOnlyFirst = CompletelyUnroll && MaxOrZero; // There's no point in performing runtime unrolling if this unroll count @@ -527,7 +638,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + SmallPtrSet TaskExits; + L->getTaskExits(TaskExits); + std::vector UnrolledLoopBlocks = L->getBlocks(); + UnrolledLoopBlocks.insert(UnrolledLoopBlocks.end(), TaskExits.begin(), + TaskExits.end()); // Loop Unrolling might create new loops. While we do preserve LoopInfo, we // might break loop-simplified form for these loops (as they, e.g., would @@ -557,13 +673,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // Identify what noalias metadata is inside the loop: if it is inside the // loop, the associated metadata must be cloned for each iteration. SmallVector LoopLocalNoAliasDeclScopes; - identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes); + identifyNoAliasScopesToClone(UnrolledLoopBlocks, LoopLocalNoAliasDeclScopes); // We place the unrolled iterations immediately after the original loop // latch. This is a reasonable default placement if we don't have block // frequencies, and if we do, well the layout will be adjusted later. auto BlockInsertPt = std::next(LatchBlock->getIterator()); for (unsigned It = 1; It != ULO.Count; ++It) { + SmallPtrSet TaskExitSrcs; SmallVector NewBlocks; SmallDenseMap NewLoops; NewLoops[L] = L; @@ -603,6 +720,14 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, for (BasicBlock *Succ : successors(*BB)) { if (L->contains(Succ)) continue; + if (TaskExits.count(Succ)) { + if (llvm::none_of(predecessors(Succ), + [&TaskExits](const BasicBlock *B) { + return TaskExits.count(B); + })) + TaskExitSrcs.insert(Succ); + continue; + } for (PHINode &PHI : Succ->phis()) { Value *Incoming = PHI.getIncomingValueForBlock(*BB); ValueToValueMapTy::iterator It = LastValueMap.find(Incoming); @@ -643,6 +768,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } + // Handle task-exit blocks from this loop similarly to ordinary loop-body + // blocks. + handleTaskExits(TaskExits, TaskExitSrcs, It, L, Header, Latches.back(), LI, + NewLoops, LoopsToSimplify, LastValueMap, NewBlocks, + UnrolledLoopBlocks, DT); + // Remap all instructions in the most recent iteration remapInstructionsInBlocks(NewBlocks, LastValueMap); for (BasicBlock *NewBlock : NewBlocks) @@ -918,6 +1049,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, simplifyLoop(SubLoop, DT, LI, SE, AC, nullptr, PreserveLCSSA); } + // Update TaskInfo manually using the updated DT. + if (TI) + // FIXME: Recalculating TaskInfo for the whole function is wasteful. + // Optimize this routine in the future. + TI->recalculate(*Header->getParent(), *DT); + return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled : LoopUnrollResult::PartiallyUnrolled; } diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 1e22eca30d2d3d..f4c2e87613bf0e 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -998,7 +998,8 @@ bool llvm::UnrollRuntimeLoopRemainder( {/*Count*/ Count - 1, /*Force*/ false, /*Runtime*/ false, /*AllowExpensiveTripCount*/ false, /*UnrollRemainder*/ false, ForgetAllSCEV}, - LI, SE, DT, AC, TTI, /*ORE*/ nullptr, PreserveLCSSA); + LI, SE, DT, AC, /*TI*/ nullptr, TTI, /*ORE*/ nullptr, + PreserveLCSSA); } if (ResultLoop && UnrollResult != LoopUnrollResult::FullyUnrolled) diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 7d6662c44f07a5..979ee670cdb7f6 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" @@ -45,6 +46,7 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" +#include "llvm/Transforms/Utils/TapirUtils.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -54,11 +56,60 @@ using namespace llvm::PatternMatch; static const char *LLVMLoopDisableNonforced = "llvm.loop.disable_nonforced"; static const char *LLVMLoopDisableLICM = "llvm.licm.disable"; +static void GetTaskExits(BasicBlock *TaskEntry, Loop *L, + SmallPtrSetImpl &TaskExits) { + // Traverse the CFG to find the exit blocks from SubT. + SmallVector Worklist; + SmallPtrSet Visited; + Worklist.push_back(TaskEntry); + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Record any block found in the task that is not contained in the loop + if (!L->contains(BB)) + TaskExits.insert(BB); + + // Stop the CFG traversal at any reattach or detached.rethrow + if (isa(BB->getTerminator()) || + isDetachedRethrow(BB->getTerminator())) + continue; + + // If we encounter a detach, only add its continuation and unwind + // destination + if (DetachInst *DI = dyn_cast(BB->getTerminator())) { + Worklist.push_back(DI->getContinue()); + if (DI->hasUnwindDest()) + Worklist.push_back(DI->getUnwindDest()); + continue; + } + + // For all other basic blocks, traverse all successors + for (BasicBlock *Succ : successors(BB)) + Worklist.push_back(Succ); + } +} + bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { bool Changed = false; + SmallPtrSet TaskExits; + { + SmallVector TaskEntriesToCheck; + for (auto *BB : L->blocks()) + if (DetachInst *DI = dyn_cast(BB->getTerminator())) + if (DI->hasUnwindDest()) + if (!L->contains(DI->getUnwindDest())) + TaskEntriesToCheck.push_back(DI->getDetached()); + + // For all tasks to check, get the loop exits that are in the task. + for (BasicBlock *TaskEntry : TaskEntriesToCheck) + GetTaskExits(TaskEntry, L, TaskExits); + } + // We re-use a vector for the in-loop predecesosrs. SmallVector InLoopPredecessors; @@ -71,7 +122,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, // keep track of the in-loop predecessors. bool IsDedicatedExit = true; for (auto *PredBB : predecessors(BB)) - if (L->contains(PredBB)) { + if (L->contains(PredBB) || TaskExits.count(PredBB)) { if (isa(PredBB->getTerminator())) // We cannot rewrite exiting edges from an indirectbr. return false; @@ -106,7 +157,23 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, for (auto *BB : L->blocks()) for (auto *SuccBB : successors(BB)) { // We're looking for exit blocks so skip in-loop successors. - if (L->contains(SuccBB)) + if (L->contains(SuccBB) || TaskExits.count(SuccBB) || + isTapirPlaceholderSuccessor(SuccBB)) + continue; + + // Visit each exit block exactly once. + if (!Visited.insert(SuccBB).second) + continue; + + Changed |= RewriteExit(SuccBB); + } + + // Visit exits from tasks within the loop as well. + for (auto *BB : TaskExits) + for (auto *SuccBB : successors(BB)) { + // We're looking for exit blocks so skip in-loop successors. + if (L->contains(SuccBB) || TaskExits.count(SuccBB) || + isTapirPlaceholderSuccessor(SuccBB)) continue; // Visit each exit block exactly once. @@ -174,6 +241,8 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); // FIXME: When all loop passes preserve MemorySSA, it can be required and // preserved here instead of the individual handling in each pass. } @@ -196,6 +265,7 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) { INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) + INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) } @@ -444,6 +514,29 @@ TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) { return TM_Unspecified; } +TransformationMode llvm::hasLoopStripmineTransformation(const Loop *L) { + if (getBooleanLoopAttribute(L, "tapir.loop.stripmine.disable")) + return TM_Disable; + + if (getBooleanLoopAttribute(L, "tapir.loop.stripmine.enable")) + return TM_ForcedByUser; + + return TM_Unspecified; +} + +TransformationMode llvm::hasLoopSpawningTransformation(const Loop *L) { + TapirLoopHints Hints(L); + + switch (Hints.getStrategy()) { + case TapirLoopHints::ST_DAC: { + return TM_ForcedByUser; + } case TapirLoopHints::ST_SEQ: + return TM_Disable; + default: + return TM_Unspecified; + } +} + /// Does a BFS from a given node to all of its children inside a given loop. /// The returned vector of nodes includes the starting point. SmallVector @@ -480,7 +573,7 @@ bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) { void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, - LoopInfo *LI, MemorySSA *MSSA) { + LoopInfo *LI, TaskInfo *TI, MemorySSA *MSSA) { assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); auto *Preheader = L->getLoopPreheader(); assert(Preheader && "Preheader should exist!"); @@ -693,6 +786,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, } LI->destroy(L); } + + if (TI && DT) + // Recompute task info. + // FIXME: Figure out a way to update task info that is less computationally + // wasteful. + TI->recalculate(*DT->getRoot()->getParent(), *DT); } void llvm::breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE, diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp index 78ebe75c121ba0..3a957ff75dd760 100644 --- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp +++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/LoopAccessAnalysis.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/MDBuilder.h" diff --git a/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/llvm/lib/Transforms/Utils/Mem2Reg.cpp index fbc6dd7613deb8..0aeac5bc6f8e32 100644 --- a/llvm/lib/Transforms/Utils/Mem2Reg.cpp +++ b/llvm/lib/Transforms/Utils/Mem2Reg.cpp @@ -14,6 +14,7 @@ #include "llvm/Transforms/Utils/Mem2Reg.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" @@ -31,27 +32,44 @@ using namespace llvm; #define DEBUG_TYPE "mem2reg" STATISTIC(NumPromoted, "Number of alloca's promoted"); +STATISTIC(NumNotParallelPromotable, "Number of alloca's not promotable due to " + "Tapir instructions"); static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, - AssumptionCache &AC) { + AssumptionCache &AC, TaskInfo &TI) { std::vector Allocas; - BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function bool Changed = false; + // Scan the function to get its entry block and all entry blocks of detached + // CFG's. We can perform this scan for entry blocks once for the function, + // because this pass preserves the CFG. + SmallVector EntryBlocks; + for (Task *T : depth_first(TI.getRootTask())) { + EntryBlocks.push_back(T->getEntry()); + if (Value *TaskFrame = T->getTaskFrameUsed()) + EntryBlocks.push_back(cast(TaskFrame)->getParent()); + } + while (true) { Allocas.clear(); // Find allocas that are safe to promote, by looking at all instructions in // the entry node - for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast(I)) // Is it an alloca? - if (isAllocaPromotable(AI)) - Allocas.push_back(AI); + for (BasicBlock *BB : EntryBlocks) + for (BasicBlock::iterator I = BB->begin(), E = --BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) { // Is it an alloca? + if (isAllocaPromotable(AI)) { + if (TI.isAllocaParallelPromotable(AI)) + Allocas.push_back(AI); + else + ++NumNotParallelPromotable; + } + } if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, &AC); + PromoteMemToReg(Allocas, DT, &AC, &TI); NumPromoted += Allocas.size(); Changed = true; } @@ -61,7 +79,8 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT, PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); auto &AC = AM.getResult(F); - if (!promoteMemoryToRegister(F, DT, AC)) + auto &TI = AM.getResult(F); + if (!promoteMemoryToRegister(F, DT, AC, TI)) return PreservedAnalyses::all(); PreservedAnalyses PA; @@ -92,12 +111,14 @@ struct PromoteLegacyPass : public FunctionPass { DominatorTree &DT = getAnalysis().getDomTree(); AssumptionCache &AC = getAnalysis().getAssumptionCache(F); - return promoteMemoryToRegister(F, DT, AC); + TaskInfo &TI = getAnalysis().getTaskInfo(); + return promoteMemoryToRegister(F, DT, AC, TI); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.setPreservesCFG(); } }; @@ -111,6 +132,7 @@ INITIALIZE_PASS_BEGIN(PromoteLegacyPass, "mem2reg", "Promote Memory to " false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register", false, false) diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 2e5f40d39912de..5adeb337f70479 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -24,6 +24,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -210,12 +211,11 @@ struct AllocaInfo { // Remember the basic blocks which define new values for the alloca DefiningBlocks.push_back(SI->getParent()); OnlyStore = SI; - } else { - LoadInst *LI = cast(User); + } else if (LoadInst *LI = dyn_cast(User)) { // Otherwise it must be a load instruction, keep track of variable // reads. UsingBlocks.push_back(LI->getParent()); - } + } else continue; if (OnlyUsedInOneBlock) { if (!OnlyBlock) @@ -308,6 +308,8 @@ struct PromoteMem2Reg { /// A cache of @llvm.assume intrinsics used by SimplifyInstruction. AssumptionCache *AC; + TaskInfo *TI; + const SimplifyQuery SQ; /// Reverse mapping of Allocas. @@ -349,10 +351,11 @@ struct PromoteMem2Reg { public: PromoteMem2Reg(ArrayRef Allocas, DominatorTree &DT, - AssumptionCache *AC) + AssumptionCache *AC, TaskInfo *TI) : Allocas(Allocas.begin(), Allocas.end()), DT(DT), DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), - AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), + AC(AC), TI(TI), + SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), nullptr, &DT, AC) {} void run(); @@ -666,6 +669,8 @@ void PromoteMem2Reg::run() { AllocaInst *AI = Allocas[AllocaNum]; assert(isAllocaPromotable(AI) && "Cannot promote non-promotable alloca!"); + assert((!TI || TI->isAllocaParallelPromotable(AI)) && + "Cannot promote non-promotable alloca in function with detach!"); assert(AI->getParent()->getParent() == &F && "All allocas should be in the same function, which is same as DF!"); @@ -732,18 +737,26 @@ void PromoteMem2Reg::run() { // to uses. SmallPtrSet LiveInBlocks; ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + // Filter out live-in blocks that are not dominated by the alloca. + if (AI->getParent() != DT.getRoot()) { + SmallVector LiveInToRemove; + for (BasicBlock *LiveIn : LiveInBlocks) + if (!DT.dominates(AI->getParent(), LiveIn)) + LiveInToRemove.push_back(LiveIn); + for (BasicBlock *ToRemove : LiveInToRemove) + LiveInBlocks.erase(ToRemove); + } - // At this point, we're committed to promoting the alloca using IDF's, and - // the standard SSA construction algorithm. Determine which blocks need phi - // nodes and see if we can optimize out some work by avoiding insertion of - // dead phi nodes. + // Determine which blocks need PHI nodes and see if we can optimize out some + // work by avoiding insertion of dead phi nodes. IDF.setLiveInBlocks(LiveInBlocks); IDF.setDefiningBlocks(DefBlocks); SmallVector PHIBlocks; IDF.calculate(PHIBlocks); - llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.find(A)->second < BBNumbers.find(B)->second; - }); + if (PHIBlocks.size() > 1) + llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.find(A)->second < BBNumbers.find(B)->second; + }); unsigned CurrentVersion = 0; for (BasicBlock *BB : PHIBlocks) @@ -831,6 +844,32 @@ void PromoteMem2Reg::run() { } } + // Check if a PHI is inserted at a task-continue block. + { + bool badPhi = false; + for (DenseMap, PHINode *>::iterator + I = NewPhiNodes.begin(), + E = NewPhiNodes.end(); + I != E; ++I) { + PHINode *PN = I->second; + BasicBlock *BB = PN->getParent(); + // Only need to check once per block + if (&BB->front() != PN) + continue; + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); + PI != E; ++PI) { + BasicBlock *P = *PI; + if (isa(P->getTerminator())) { + LLVM_DEBUG(dbgs() << "Illegal PHI inserted in block " << BB->getName() + << "\n"); + badPhi = true; + } + } + } + assert(!badPhi && "PromoteMem2Reg inserted illegal phi."); + } + // At this point, the renamer has added entries to PHI nodes for all reachable // code. Unfortunately, there may be unreachable blocks which the renamer // hasn't traversed. If this is the case, the PHI nodes may not @@ -1127,10 +1166,10 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, } void llvm::PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, - AssumptionCache *AC) { + AssumptionCache *AC, TaskInfo *TI) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, AC).run(); + PromoteMem2Reg(Allocas, DT, AC, TI).run(); } diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp index de3626a24212c2..e8220b5ac117e2 100644 --- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp +++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -610,6 +610,14 @@ class SCCPInstVisitor : public InstVisitor { void visitReturnInst(ReturnInst &I); void visitTerminator(Instruction &TI); + void visitReattachInst(ReattachInst &I) { + markOverdefined(&I); + visitTerminator(I); + } + void visitSyncInst(SyncInst &I) { + markOverdefined(&I); + visitTerminator(I); + } void visitCastInst(CastInst &I); void visitSelectInst(SelectInst &I); @@ -1105,6 +1113,12 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI, return; } + // All destinations of a Tapir instruction are assumed to be feasible. + if (isa(&TI) || isa(&TI) || isa(&TI)) { + Succs.assign(TI.getNumSuccessors(), true); + return; + } + LLVM_DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n'); llvm_unreachable("SCCP: Don't know how to handle this terminator!"); } diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp index ebe9cb27f5ab08..6ba435106272b0 100644 --- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp +++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp @@ -43,11 +43,18 @@ static AvailableValsTy &getAvailableVals(void *AV) { return *static_cast(AV); } +typedef DenseMap ValIsDetachedTy; +static ValIsDetachedTy &getValIsDetached(void *VID) { + return *static_cast(VID); +} + SSAUpdater::SSAUpdater(SmallVectorImpl *NewPHI) : InsertedPHIs(NewPHI) {} SSAUpdater::~SSAUpdater() { delete static_cast(AV); + if (VID) + delete static_cast(VID); } void SSAUpdater::Initialize(Type *Ty, StringRef Name) { @@ -55,6 +62,10 @@ void SSAUpdater::Initialize(Type *Ty, StringRef Name) { AV = new AvailableValsTy(); else getAvailableVals(AV).clear(); + if (!VID) + VID = new ValIsDetachedTy(); + else + getValIsDetached(VID).clear(); ProtoType = Ty; ProtoName = std::string(Name); } @@ -105,6 +116,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { // predecessor. SmallVector, 8> PredValues; Value *SingularValue = nullptr; + SmallVector DetachPreds, ReattachPreds; // We can get our predecessor info by walking the pred_iterator list, but it // is relatively slow. If we already have PHI nodes in this block, walk one @@ -113,6 +125,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) { BasicBlock *PredBB = SomePhi->getIncomingBlock(i); Value *PredVal = GetValueAtEndOfBlock(PredBB); + if (isa(PredBB->getTerminator())) { + ReattachPreds.push_back(PredBB); + continue; + } + if (isa(PredBB->getTerminator())) + DetachPreds.push_back(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. @@ -125,6 +143,12 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { bool isFirstPred = true; for (BasicBlock *PredBB : predecessors(BB)) { Value *PredVal = GetValueAtEndOfBlock(PredBB); + if (isa(PredBB->getTerminator())) { + ReattachPreds.push_back(PredBB); + continue; + } + if (isa(PredBB->getTerminator())) + DetachPreds.push_back(PredBB); PredValues.push_back(std::make_pair(PredBB, PredVal)); // Compute SingularValue. @@ -135,6 +159,33 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { SingularValue = nullptr; } } + // Record any values we discover whose definitions occur in detached blocks. + if (!ReattachPreds.empty()) { + assert(!DetachPreds.empty() && + "Block has reattach predecessor but no detached predecessor."); + SmallVector, 8> DetachPredValues; + for (BasicBlock *DetachPred : DetachPreds) { + Value *DetachVal = GetValueAtEndOfBlock(DetachPred); + DetachPredValues.push_back(std::make_pair(DetachPred, DetachVal)); + } + for (BasicBlock *ReattachPred : ReattachPreds) { + Value *ReattachVal = GetValueAtEndOfBlock(ReattachPred); + bool FoundMatchingDetach = false; + for (std::pair DetachPredVal : DetachPredValues) { + if (DetachPredVal.second == ReattachVal) { + FoundMatchingDetach = true; + PredValues.push_back(std::make_pair(ReattachPred, ReattachVal)); + break; + } + } + if (!FoundMatchingDetach) { + SingularValue = nullptr; + getValIsDetached(VID)[BB] = true; + PredValues.push_back(std::make_pair( + ReattachPred, UndefValue::get(ReattachVal->getType()))); + } + } + } // If there are no predecessors, just return undef. if (PredValues.empty()) @@ -184,6 +235,10 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return InsertedPHI; } +bool SSAUpdater::GetValueIsDetachedInBlock(BasicBlock *BB) { + return getValIsDetached(VID)[BB]; +} + void SSAUpdater::RewriteUse(Use &U) { Instruction *User = cast(U.getUser()); @@ -291,6 +346,18 @@ class SSAUpdaterTraits { return UndefValue::get(Updater->ProtoType); } + /// BlockReattaches - Return true if this block is terminated with a + /// reattach, false otherwise. + static bool BlockReattaches(BasicBlock *BB, SSAUpdater *Updater) { + return isa(BB->getTerminator()); + } + + /// BlockReattaches - Return true if this block is terminated with a + /// detach, false otherwise. + static bool BlockDetaches(BasicBlock *BB, SSAUpdater *Updater) { + return isa(BB->getTerminator()); + } + /// CreateEmptyPHI - Create a new PHI instruction in the specified block. /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, @@ -337,7 +404,8 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) { if (Value *V = AvailableVals[BB]) return V; - SSAUpdaterImpl Impl(this, &AvailableVals, InsertedPHIs); + SSAUpdaterImpl Impl(this, &AvailableVals, InsertedPHIs, + &getValIsDetached(VID)); return Impl.GetValue(BB); } @@ -458,7 +526,14 @@ void LoadAndStorePromoter::run(const SmallVectorImpl &Insts) { // Okay, now we rewrite all loads that use live-in values in the loop, // inserting PHI nodes as necessary. for (LoadInst *ALoad : LiveInLoads) { - Value *NewVal = SSA.GetValueInMiddleOfBlock(ALoad->getParent()); + BasicBlock *BB = ALoad->getParent(); + Value *NewVal = SSA.GetValueInMiddleOfBlock(BB); + + // Skip loads whose definitions are detached. + if (Instruction *Def = dyn_cast(NewVal)) + if (SSA.GetValueIsDetachedInBlock(Def->getParent())) + continue; + replaceLoadWithValue(ALoad, NewVal); // Avoid assertions in unreachable code. diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index bd7ab7c9878179..e068f9a4aa1949 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -73,6 +73,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -180,6 +181,11 @@ static cl::opt MaxSwitchCasesPerResult( "max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select")); +static cl::opt PreserveAllSpawns( + "simplifycfg-preserve-all-spawns", cl::Hidden, cl::init(false), + cl::desc("Temporary development switch to ensure SimplifyCFG does not " + "eliminate spawns that immediately sync.")); + STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps"); STATISTIC(NumLinearMaps, "Number of switch instructions turned into linear mapping"); @@ -267,6 +273,7 @@ class SimplifyCFGOpt { bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder); bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder); bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder); + bool simplifySync(SyncInst *SI); bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI, IRBuilder<> &Builder); @@ -1500,6 +1507,16 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, if (C1->isMustTailCall() != C2->isMustTailCall()) return false; + // Disallow hoisting of setjmp. Although hoisting the setjmp technically + // produces valid IR, it seems hard to generate appropariate machine code from + // this IR, e.g., for X86. + if (IntrinsicInst *II = dyn_cast(I1)) + if (Intrinsic::eh_sjlj_setjmp == II->getIntrinsicID()) + return false; + if (IntrinsicInst *II = dyn_cast(I2)) + if (Intrinsic::eh_sjlj_setjmp == II->getIntrinsicID()) + return false; + if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2)) return false; @@ -1515,6 +1532,14 @@ static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, return true; } +// Helper function to check if an instruction is a taskframe.create call. +static bool isTaskFrameCreate(const Instruction *I) { + if (const IntrinsicInst *II = dyn_cast(I)) + if (Intrinsic::taskframe_create == II->getIntrinsicID()) + return true; + return false; +} + /// Given a conditional branch that goes to BB1 and BB2, hoist any common code /// in the two blocks up into the branch block. The caller of this function /// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given, @@ -1549,6 +1574,11 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { while (isa(I2)) I2 = &*BB2_Itr++; } + // Skip taskframe.create calls. + while (isTaskFrameCreate(I1)) + I1 = &*BB1_Itr++; + while (isTaskFrameCreate(I2)) + I2 = &*BB2_Itr++; if (isa(I1)) return false; @@ -1649,6 +1679,11 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) { while (isa(I2)) I2 = &*BB2_Itr++; } + // Skip taskframe.create calls. + while (isTaskFrameCreate(I1)) + I1 = &*BB1_Itr++; + while (isTaskFrameCreate(I2)) + I2 = &*BB2_Itr++; } return Changed; @@ -2622,8 +2657,13 @@ static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) { // Record all the predecessors of this `landingpad`. As per verifier, // the only allowed predecessor is the unwind edge of an `invoke`. // We want to group "compatible" `invokes` into the same set to be merged. - for (BasicBlock *PredBB : predecessors(BB)) + for (BasicBlock *PredBB : predecessors(BB)) { + // Tapir allows a detach to be a predecessor of a landingpad. If we find a + // detach predecessor, quit early. + if (isa(PredBB->getTerminator())) + return Changed; Grouper.insert(cast(PredBB->getTerminator())); + } // And now, merge `invoke`s that were grouped togeter. for (ArrayRef Invokes : Grouper.Sets) { @@ -2737,7 +2777,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, // means it's never concurrently read or written, hence moving the store // from under the condition will not introduce a data race. auto *AI = dyn_cast(getUnderlyingObject(StorePtr)); - if (AI && !PointerMayBeCaptured(AI, false, true)) + if (AI && !PointerMayBeCaptured(AI, false, true) && + GetDetachedCtx(LI->getParent()) == GetDetachedCtx(AI->getParent())) // Found a previous load, return it. return LI; } @@ -4893,6 +4934,14 @@ bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) { return !TrivialUnwindBlocks.empty(); } +static bool isTaskFrameUnassociated(const Value *TFCreate) { + for (const User *U : TFCreate->users()) + if (const Instruction *I = dyn_cast(U)) + if (isTapirIntrinsic(Intrinsic::taskframe_use, I)) + return false; + return true; +} + // Simplify resume that is only used by a single (non-phi) landing pad. bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { BasicBlock *BB = RI->getParent(); @@ -4905,6 +4954,14 @@ bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) { make_range(LPInst->getNextNode(), RI))) return false; + // Check that no predecessor is a taskframe.resume for an unassociated + // taskframe. + for (const BasicBlock *Pred : predecessors(BB)) + if (isTaskFrameResume(Pred->getTerminator())) + if (isTaskFrameUnassociated( + cast(Pred->getTerminator())->getArgOperand(0))) + return false; + // Turn all invokes that unwind here into calls and delete the basic block. for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) { removeUnwindEdge(Pred, DTU); @@ -5173,9 +5230,10 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { DTU->applyUpdates(Updates); Updates.clear(); } - auto *CI = cast(removeUnwindEdge(TI->getParent(), DTU)); - if (!CI->doesNotThrow()) - CI->setDoesNotThrow(); + if (auto *CI = + dyn_cast(removeUnwindEdge(TI->getParent(), DTU))) + if (!CI->doesNotThrow()) + CI->setDoesNotThrow(); Changed = true; } } else if (auto *CSI = dyn_cast(TI)) { @@ -5239,6 +5297,15 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) { new UnreachableInst(TI->getContext(), TI); TI->eraseFromParent(); Changed = true; + } else if (DetachInst *DI = dyn_cast(TI)) { + if (DI->getUnwindDest() == BB) { + // If the unwind destination of the detach is unreachable, simply remove + // the unwind edge. + removeUnwindEdge(DI->getParent(), DTU); + Changed = true; + } + // Detaches of unreachables are handled via + // serializeDetachOfUnreachable. } } @@ -6899,6 +6966,13 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI, // path instead and make ourselves dead. SmallSetVector UniquePreds(pred_begin(BB), pred_end(BB)); for (BasicBlock *Pred : UniquePreds) { + // Handle detach predecessors. + if (DetachInst *DI = dyn_cast(Pred->getTerminator())) { + assert(DI->getDetached() != BB && DI->getContinue() != BB && + DI->getUnwindDest() == BB && "unexpected detach successor"); + DI->setUnwindDest(OtherPred); + continue; + } InvokeInst *II = cast(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); @@ -6958,6 +7032,18 @@ bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI, !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU)) return true; + // If this branch goes to a reattach block with a single predecessor, merge + // the two blocks. + if (isa(Succ->getTerminator()) && Succ->getSinglePredecessor()) { + assert(!NeedCanonicalLoop && + "Reattach-terminated successor cannot by a loop header."); + // Preserve the name of BB, for cleanliness. + std::string BBName = BB->getName().str(); + MergeBasicBlockIntoOnlyPred(Succ, DTU); + Succ->setName(BBName); + return true; + } + // If the only instruction in the block is a seteq/setne comparison against a // constant, try to simplify the block. if (ICmpInst *ICI = dyn_cast(I)) @@ -7107,6 +7193,71 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { return false; } +bool SimplifyCFGOpt::simplifySync(SyncInst *SI) { + const Value *SyncRegion = SI->getSyncRegion(); + BasicBlock *Succ = SI->getSuccessor(0); + + // Get the first non-trivial instruction in the successor of the sync. Along + // the way, record a sync_unwind intrinsic for the sync if we find one. + Instruction *SyncUnwind = nullptr; + BasicBlock::iterator SuccI = + Succ->getFirstNonPHIOrDbg(true)->getIterator(); + if (isSyncUnwind(&*SuccI, SyncRegion)) { + SyncUnwind = &*SuccI; + if (isa(SyncUnwind)) + // We cannot eliminate syncs with associated sync-unwind that has an + // associated landingpad. + return false; + SuccI = Succ->getFirstNonPHIOrDbgOrSyncUnwind(true)->getIterator(); + } + + if (!SuccI->isTerminator()) + // There's nontrivial code in the successor of the sync, so don't eliminate + // the sync. + return false; + + if (SyncInst *SuccSI = dyn_cast(&*SuccI)) { + if (SuccSI->getSyncRegion() == SyncRegion) { + // The successor block is terminated by a sync in the same sync region, + // meaning the given sync is redundant. Eliminate the given sync. + if (SyncUnwind) + SyncUnwind->eraseFromParent(); + ReplaceInstWithInst(SI, BranchInst::Create(Succ)); + return requestResimplify(); + } + } + + // Otherwise check for an unconditional branch terminating the successor + // block. + if (!isa(*SuccI)) + return false; + + BranchInst *BI = dyn_cast(&*SuccI); + if (!BI->isUnconditional()) + return false; + + // Check if the successor of the unconditional branch simply contains a sync + // in the same sync region. + BasicBlock::iterator BrSuccI = + BI->getSuccessor(0)->getFirstNonPHIOrDbg(true)->getIterator(); + if (!BrSuccI->isTerminator()) + // There's nontrivial code in the successor of the sync, so don't eliminate + // it. + return false; + if (SyncInst *SuccSI = dyn_cast(&*BrSuccI)) { + if (SuccSI->getSyncRegion() == SyncRegion) { + // The successor block is terminated by a sync in the same sync region, + // meaning the given sync is redundant. Eliminate the given sync. + if (SyncUnwind) + SyncUnwind->eraseFromParent(); + ReplaceInstWithInst(SI, BranchInst::Create(Succ)); + return requestResimplify(); + } + } + + return false; +} + /// Check if passing a value to an instruction will cause undefined behavior. static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) { Constant *C = dyn_cast(V); @@ -7256,6 +7407,239 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB, return false; } +/// If BB immediately syncs and BB's predecessor detaches, serialize the sync +/// and detach. This will allow normal serial optimization passes to remove the +/// blocks appropriately. Return false if BB does not terminate with a +/// reattach. +static bool serializeDetachToImmediateSync(BasicBlock *BB, + DomTreeUpdater *DTU) { + Instruction *I = BB->getFirstNonPHIOrDbgOrLifetime(); + if (isa(I)) { + // This block is empty + bool Changed = false; + // Collect the detach and reattach predecessors. + SmallPtrSet DetachPreds; + SmallVector ReattachPreds; + for (BasicBlock *PredBB : predecessors(BB)) { + if (DetachInst *DI = dyn_cast(PredBB->getTerminator())) { + // This transformation gets too complicated if the detached task might + // throw, so abort. + if (DI->hasUnwindDest()) + return false; + DetachPreds.insert(DI); + } + if (ReattachInst *RI = dyn_cast(PredBB->getTerminator())) + ReattachPreds.push_back(RI); + } + std::vector Updates; + Value *SyncRegion = cast(I)->getSyncRegion(); + for (DetachInst *DI : DetachPreds) { + BasicBlock *Detached = DI->getDetached(); + + // If this detached task uses a taskframe, mark those taskframe + // instrinsics to be erased. + SmallVector ToErase; + if (Value *TaskFrame = getTaskFrameUsed(Detached)) { + // If this detach uses a taskframe, record that taskframe.use. + for (User *U : TaskFrame->users()) { + if (IntrinsicInst *II = dyn_cast(U)) { + if (Intrinsic::taskframe_use == II->getIntrinsicID()) + ToErase.push_back(II); + else + // We need more complicated logic to effectively inline this + // taskframe, so abort. + return false; + } + } + ToErase.push_back(cast(TaskFrame)); + } + + // Replace the detach with a branch to the detached block. + BB->removePredecessor(DI->getParent()); + ReplaceInstWithInst(DI, BranchInst::Create(Detached)); + // Record update to DTU if DTU is available. + if (DTU) + Updates.push_back({DominatorTree::Delete, DI->getParent(), BB}); + + // Move static alloca instructions in the detached block to the + // appropriate entry block. + MoveStaticAllocasInBlock(cast(SyncRegion)->getParent(), + Detached, ReattachPreds); + + // Erase any instructions marked to be erased. + for (Instruction *I : ToErase) + I->eraseFromParent(); + + // We should not need to add new llvm.stacksave/llvm.stackrestore + // intrinsics, because we're not introducing new alloca's into a loop. + Changed = true; + } + for (Instruction *RI : ReattachPreds) { + // Replace the reattach with an unconditional branch. + ReplaceInstWithInst(RI, BranchInst::Create(BB)); + Changed = true; + } + // Update DTU if available. + if (DTU) + DTU->applyUpdates(Updates); + return Changed; + } + return false; +} + +/// If BB immediately reattaches and BB's predecessor detaches, serialize the +/// reattach and detach. This will allow normal serial optimization passes to +/// remove the blocks appropriately. Return false if BB does not terminate with +/// a reattach or predecessor does terminate with detach. +static bool serializeTrivialDetachedBlock(BasicBlock *BB, DomTreeUpdater *DTU) { + Instruction *I = BB->getFirstNonPHIOrDbgOrLifetime(); + SmallVector ToErase; + // Skip a possible taskframe.use intrinsic in the task. + if (isTapirIntrinsic(Intrinsic::taskframe_use, I)) { + Value *TaskFrame = cast(I)->getArgOperand(0); + // Check for any other uses of TaskFrame. + for (User *U : TaskFrame->users()) + if (U != I) + // We found another use of the taskframe, making it too complicated for + // us to handle. Abort. + return false; + ToErase.push_back(I); + ToErase.push_back(cast(TaskFrame)); + I = &*(++(I->getIterator())); + } + if (ReattachInst *RI = dyn_cast(I)) { + // This detached block is empty. + // Scan predecessors to verify that all of them detach BB. + for (BasicBlock *PredBB : predecessors(BB)) { + if (!isa(PredBB->getTerminator())) + return false; + } + // All predecessors detach BB, so we can serialize. Copy the predecessors + // into a separate vector, so we can safely remove the predecessors. + SmallVector Preds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *PredBB : Preds) { + DetachInst *DI = dyn_cast(PredBB->getTerminator()); + BasicBlock *Detached = DI->getDetached(); + BasicBlock *Continue = DI->getContinue(); + assert(RI->getSuccessor(0) == Continue && + "Reattach destination does not match continue block of associated " + "detach."); + // Remove the predecessor through the detach from the continue block. + Continue->removePredecessor(PredBB); + // Serialize the detach: replace it with an unconditional branch. + ReplaceInstWithInst(DI, BranchInst::Create(Detached)); + // Update DTU if available. + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, PredBB, Continue}}); + } + // Erase any instructions marked to be erased. + for (Instruction *I : ToErase) + I->eraseFromParent(); + // Serialize the reattach: replace it with an unconditional branch. + ReplaceInstWithInst(RI, BranchInst::Create(RI->getSuccessor(0))); + return true; + } + return false; +} + +/// If BB detaches an CFG that cannot reach the continuation, serialize the +/// detach. Assuming the CFG is valid, this scenario arises when the detached +/// CFG is terminated by unreachable instructions. +static bool serializeDetachOfUnreachable(BasicBlock *BB, DomTreeUpdater *DTU) { + // This method assumes that the detached CFG is valid. + Instruction *I = BB->getTerminator(); + if (DetachInst *DI = dyn_cast(I)) { + // Check if continuation of the detach is not reached by reattach + // instructions. If the detached CFG is valid, then the detached CFG must + // be terminated by unreachable instructions. + BasicBlock *Continue = DI->getContinue(); + for (BasicBlock *PredBB : predecessors(Continue)) + if (isa(PredBB->getTerminator())) + return false; + + if (DI->hasUnwindDest()) + // These detaches are too complicated for SimplifyCFG to handle. Abort. + return false; + + // If this detached task uses a taskframe, mark those taskframe instrinsics + // to be erased. + SmallVector ToErase; + if (Value *TaskFrame = getTaskFrameUsed(DI->getDetached())) { + // If this detach uses a taskframe, remove that taskframe. + for (User *U : TaskFrame->users()) { + if (IntrinsicInst *II = dyn_cast(U)) { + if (Intrinsic::taskframe_use == II->getIntrinsicID()) + ToErase.push_back(II); + else + // We need more complicated logic to effectively inline this + // taskframe, so abort. + return false; + } + } + ToErase.push_back(cast(TaskFrame)); + } + + // Remove the predecessor through the detach from the continue block. + Continue->removePredecessor(BB); + // Update DTU if available. + if (DTU) + DTU->applyUpdates({{DominatorTree::Delete, BB, Continue}}); + // Replace the detach with a branch to the detached block. + ReplaceInstWithInst(DI, BranchInst::Create(DI->getDetached())); + // Erase any instructions marked to be erased. + for (Instruction *I : ToErase) + I->eraseFromParent(); + return true; + } + return false; +} + +// Remove any syncs whose sync region is empty, meaning that the region contains +// no detach instructions. These sync instructions don't synchronize anything, +// so they can be removed. +static bool removeEmptySyncs(BasicBlock *BB) { + if (SyncInst *SI = dyn_cast(BB->getTerminator())) { + // Get the sync region containing this sync + Value *SyncRegion = SI->getSyncRegion(); + bool SyncRegionIsEmpty = true; + SmallVector Syncs; + // Scan the Tapir instructions in this sync region. + for (User *U : SyncRegion->users()) { + // If the sync region contains a detach or a reattach, then it's not + // empty. + if (isa(U) || isa(U)) + SyncRegionIsEmpty = false; + // Collect the syncs in this region. + else if (isa(U)) + Syncs.push_back(cast(U)); + } + // If the sync region is empty, then remove all sync instructions in it. + if (SyncRegionIsEmpty) { + SmallPtrSet MaybeDeadSyncUnwinds; + for (SyncInst *Sync : Syncs) { + // Check for any sync.unwinds that might now be dead. + Instruction *MaybeSyncUnwind = + Sync->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); + if (isSyncUnwind(MaybeSyncUnwind, SyncRegion)) + MaybeDeadSyncUnwinds.insert(cast(MaybeSyncUnwind)); + + LLVM_DEBUG(dbgs() << "Removing empty sync " << *Sync << "\n"); + ReplaceInstWithInst(Sync, BranchInst::Create(Sync->getSuccessor(0))); + } + // Remove any dead sync.unwinds. + for (CallBase *CB : MaybeDeadSyncUnwinds) { + LLVM_DEBUG(dbgs() << "Remove dead sync unwind " << *CB << "? "); + if (removeDeadSyncUnwind(CB)) + LLVM_DEBUG(dbgs() << "Yes.\n"); + else + LLVM_DEBUG(dbgs() << "No.\n"); + } + return true; + } + } + return false; +} + bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { bool Changed = false; @@ -7283,6 +7667,15 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { if (removeUndefIntroducingPredecessor(BB, DTU, Options.AC)) return requestResimplify(); + // Check for and remove trivial detached blocks. + Changed |= serializeTrivialDetachedBlock(BB, DTU); + if (!PreserveAllSpawns) + Changed |= serializeDetachToImmediateSync(BB, DTU); + Changed |= serializeDetachOfUnreachable(BB, DTU); + + // Check for and remove sync instructions in empty sync regions. + Changed |= removeEmptySyncs(BB); + // Merge basic blocks into their predecessor if there is only one distinct // pred, and if there is only one distinct successor of the predecessor, and // if there are no PHI nodes. @@ -7333,6 +7726,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) { case Instruction::IndirectBr: Changed |= simplifyIndirectBr(cast(Terminator)); break; + case Instruction::Sync: + Changed |= simplifySync(cast(Terminator)); } return Changed; diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp new file mode 100644 index 00000000000000..2746d45a648ac3 --- /dev/null +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -0,0 +1,2509 @@ +//===- TapirUtils.cpp - Utility methods for Tapir --------------*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file utility methods for handling code containing Tapir instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/EHPersonalities.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +#include "llvm/Transforms/Utils/ValueMapper.h" + +using namespace llvm; + +#define DEBUG_TYPE "tapirutils" + +// Check if the given instruction is an intrinsic with the specified ID. If a +// value \p V is specified, then additionally checks that the first argument of +// the intrinsic matches \p V. +bool llvm::isTapirIntrinsic(Intrinsic::ID ID, const Instruction *I, + const Value *V) { + if (const CallBase *CB = dyn_cast(I)) + if (const Function *Called = CB->getCalledFunction()) + if (ID == Called->getIntrinsicID()) + if (!V || (V == CB->getArgOperand(0))) + return true; + return false; +} + +/// Returns true if the given instruction performs a detached.rethrow, false +/// otherwise. If \p SyncRegion is specified, then additionally checks that the +/// detached.rethrow uses \p SyncRegion. +bool llvm::isDetachedRethrow(const Instruction *I, const Value *SyncRegion) { + return isa(I) && + isTapirIntrinsic(Intrinsic::detached_rethrow, I, SyncRegion); +} + +/// Returns true if the given instruction performs a taskframe.resume, false +/// otherwise. If \p TaskFrame is specified, then additionally checks that the +/// taskframe.resume uses \p TaskFrame. +bool llvm::isTaskFrameResume(const Instruction *I, const Value *TaskFrame) { + return isa(I) && + isTapirIntrinsic(Intrinsic::taskframe_resume, I, TaskFrame); +} + +/// Returns true if the given basic block \p B is a placeholder successor of a +/// taskframe.resume or detached.rethrow. +bool llvm::isTapirPlaceholderSuccessor(const BasicBlock *B) { + for (const BasicBlock *Pred : predecessors(B)) { + if (!isDetachedRethrow(Pred->getTerminator()) && + !isTaskFrameResume(Pred->getTerminator())) + return false; + + const InvokeInst *II = dyn_cast(Pred->getTerminator()); + if (B != II->getNormalDest()) + return false; + } + return true; +} + +/// Returns a taskframe.resume that uses the given taskframe, or nullptr if no +/// taskframe.resume uses this taskframe. +InvokeInst *llvm::getTaskFrameResume(Value *TaskFrame) { + // It should suffice to get the unwind destination of the first + // taskframe.resume we find. + for (User *U : TaskFrame->users()) + if (Instruction *I = dyn_cast(U)) + if (isTaskFrameResume(I)) + return cast(I); + return nullptr; +} + +/// Returns the unwind destination of a taskframe.resume that uses the given +/// taskframe, or nullptr if no such unwind destination exists. +BasicBlock *llvm::getTaskFrameResumeDest(Value *TaskFrame) { + if (InvokeInst *TFResume = getTaskFrameResume(TaskFrame)) + return TFResume->getUnwindDest(); + return nullptr; +} + +/// Returns true if the given instruction is a sync.uwnind, false otherwise. If +/// \p SyncRegion is specified, then additionally checks that the sync.unwind +/// uses \p SyncRegion. +bool llvm::isSyncUnwind(const Instruction *I, const Value *SyncRegion, + bool CheckForInvoke) { + if (isTapirIntrinsic(Intrinsic::sync_unwind, I, SyncRegion)) + return !CheckForInvoke || isa(I); + return false; +} + +/// Returns true if BasicBlock \p B is a placeholder successor, that is, it's +/// the immediate successor of only detached-rethrow and taskframe-resume +/// instructions. +bool llvm::isPlaceholderSuccessor(const BasicBlock *B) { + for (const BasicBlock *Pred : predecessors(B)) { + if (!isDetachedRethrow(Pred->getTerminator()) && + !isTaskFrameResume(Pred->getTerminator())) + return false; + if (B == cast( + Pred->getTerminator())->getUnwindDest()) + return false; + } + return true; +} + +/// Returns true if the given basic block ends a taskframe, false otherwise. If +/// \p TaskFrame is specified, then additionally checks that the +/// taskframe.end uses \p TaskFrame. +bool llvm::endsTaskFrame(const BasicBlock *B, const Value *TaskFrame) { + const Instruction *I = B->getTerminator()->getPrevNode(); + return I && isTapirIntrinsic(Intrinsic::taskframe_end, I, TaskFrame); +} + +/// Returns the spindle containing the taskframe.create used by task \p T, or +/// the entry spindle of \p T if \p T has no such taskframe.create spindle. +Spindle *llvm::getTaskFrameForTask(Task *T) { + Spindle *TF = T->getTaskFrameCreateSpindle(); + if (!TF) + TF = T->getEntrySpindle(); + return TF; +} + +// Removes the given sync.unwind instruction, if it is dead. Returns true if +// the sync.unwind was removed, false otherwise. +bool llvm::removeDeadSyncUnwind(CallBase *SyncUnwind, + DomTreeUpdater *DTU) { + assert(isSyncUnwind(SyncUnwind) && + "removeDeadSyncUnwind not called on a sync.unwind."); + const Value *SyncRegion = SyncUnwind->getArgOperand(0); + + // Scan predecessor blocks for syncs using this sync.unwind. + for (BasicBlock *Pred : predecessors(SyncUnwind->getParent())) + if (SyncInst *SI = dyn_cast(Pred->getTerminator())) + if (SyncRegion == SI->getSyncRegion()) + return false; + + // We found no predecessor syncs that use this sync.unwind, so remove it. + if (InvokeInst *II = dyn_cast(SyncUnwind)) { + II->getUnwindDest()->removePredecessor(II->getParent()); + if (DTU) + DTU->applyUpdates( + {{DominatorTree::Delete, II->getUnwindDest(), II->getParent()}}); + ReplaceInstWithInst(II, BranchInst::Create(II->getNormalDest())); + } else { + SyncUnwind->eraseFromParent(); + } + return true; +} + +/// Returns true if the reattach instruction appears to match the given detach +/// instruction, false otherwise. +/// +/// If a dominator tree is not given, then this method does a best-effort check. +/// In particular, this function might return true when the reattach instruction +/// does not actually match the detach instruction, but instead matches a +/// sibling detach instruction with the same continuation. This best-effort +/// check is sufficient in some cases, such as during a traversal of a detached +/// task. +bool llvm::ReattachMatchesDetach(const ReattachInst *RI, const DetachInst *DI, + DominatorTree *DT) { + // Check that the reattach instruction belonds to the same sync region as the + // detach instruction. + if (RI->getSyncRegion() != DI->getSyncRegion()) + return false; + + // Check that the destination of the reattach matches the continue destination + // of the detach. + if (RI->getDetachContinue() != DI->getContinue()) + return false; + + // If we have a dominator tree, check that the detach edge dominates the + // reattach. + if (DT) { + BasicBlockEdge DetachEdge(DI->getParent(), DI->getDetached()); + if (!DT->dominates(DetachEdge, RI->getParent())) + return false; + } + + return true; +} + +/// Returns true of the given task itself contains a sync instruction. +bool llvm::taskContainsSync(const Task *T) { + for (const Spindle *S : + depth_first>(T->getEntrySpindle())) { + if (S == T->getEntrySpindle()) + continue; + for (const BasicBlock *Pred : predecessors(S->getEntry())) + if (isa(Pred->getTerminator())) + return true; + } + return false; +} + +/// Return the result of AI->isStaticAlloca() if AI were moved to the entry +/// block. Allocas used in inalloca calls and allocas of dynamic array size +/// cannot be static. +/// (Borrowed from Transforms/Utils/InlineFunction.cpp) +static bool allocaWouldBeStaticInEntry(const AllocaInst *AI) { + return isa(AI->getArraySize()) && !AI->isUsedWithInAlloca(); +} + +// Check whether this Value is used by a lifetime intrinsic. +static bool isUsedByLifetimeMarker(Value *V) { + for (User *U : V->users()) { + if (IntrinsicInst *II = dyn_cast(U)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return true; + } + } + } + return false; +} + +// Check whether the given alloca already has lifetime.start or lifetime.end +// intrinsics. +static bool hasLifetimeMarkers(AllocaInst *AI) { + Type *Ty = AI->getType(); + Type *Int8PtrTy = Type::getInt8PtrTy(Ty->getContext(), + Ty->getPointerAddressSpace()); + if (Ty == Int8PtrTy) + return isUsedByLifetimeMarker(AI); + + // Do a scan to find all the casts to i8*. + for (User *U : AI->users()) { + if (U->getType() != Int8PtrTy) continue; + if (U->stripPointerCasts() != AI) continue; + if (isUsedByLifetimeMarker(U)) + return true; + } + return false; +} + +// Move static allocas in Block into Entry, which is assumed to dominate Block. +// Leave lifetime markers behind in Block and before each instruction in +// ExitPoints for those static allocas. Returns true if Block still contains +// dynamic allocas, which cannot be moved. +bool llvm::MoveStaticAllocasInBlock( + BasicBlock *Entry, BasicBlock *Block, + SmallVectorImpl &ExitPoints) { + Function *F = Entry->getParent(); + SmallVector StaticAllocas; + bool ContainsDynamicAllocas = false; + BasicBlock::iterator InsertPoint = Entry->begin(); + for (BasicBlock::iterator I = Block->begin(), E = Block->end(); I != E;) { + AllocaInst *AI = dyn_cast(I++); + if (!AI) continue; + + if (!allocaWouldBeStaticInEntry(AI)) { + ContainsDynamicAllocas = true; + continue; + } + + StaticAllocas.push_back(AI); + + // Scan for the block of allocas that we can move over, and move them all at + // once. + while (isa(I) && + allocaWouldBeStaticInEntry(cast(I))) { + StaticAllocas.push_back(cast(I)); + ++I; + } + + // Transfer all of the allocas over in a block. Using splice means that the + // instructions aren't removed from the symbol table, then reinserted. + Entry->splice(InsertPoint, &*Block, AI->getIterator(), I); + } + + // Move any syncregion_start's into the entry basic block. + for (BasicBlock::iterator I = Block->begin(), E = Block->end(); I != E;) { + IntrinsicInst *II = dyn_cast(I++); + if (!II) continue; + if (Intrinsic::syncregion_start != II->getIntrinsicID()) + continue; + + while (isa(I) && + Intrinsic::syncregion_start == + cast(I)->getIntrinsicID()) + ++I; + + Entry->splice(InsertPoint, &*Block, II->getIterator(), I); + } + + // Leave lifetime markers for the static alloca's, scoping them to the + // from cloned block to cloned exit. + if (!StaticAllocas.empty()) { + IRBuilder<> Builder(&*Block->getFirstInsertionPt()); + for (unsigned ai = 0, ae = StaticAllocas.size(); ai != ae; ++ai) { + AllocaInst *AI = StaticAllocas[ai]; + // Don't mark swifterror allocas. They can't have bitcast uses. + if (AI->isSwiftError()) + continue; + + // If the alloca is already scoped to something smaller than the whole + // function then there's no need to add redundant, less accurate markers. + if (hasLifetimeMarkers(AI)) + continue; + + // Try to determine the size of the allocation. + ConstantInt *AllocaSize = nullptr; + if (ConstantInt *AIArraySize = + dyn_cast(AI->getArraySize())) { + auto &DL = F->getParent()->getDataLayout(); + Type *AllocaType = AI->getAllocatedType(); + uint64_t AllocaTypeSize = DL.getTypeAllocSize(AllocaType); + uint64_t AllocaArraySize = AIArraySize->getLimitedValue(); + + // Don't add markers for zero-sized allocas. + if (AllocaArraySize == 0) + continue; + + // Check that array size doesn't saturate uint64_t and doesn't + // overflow when it's multiplied by type size. + if (AllocaArraySize != ~0ULL && + UINT64_MAX / AllocaArraySize >= AllocaTypeSize) { + AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()), + AllocaArraySize * AllocaTypeSize); + } + } + + Builder.CreateLifetimeStart(AI, AllocaSize); + for (Instruction *ExitPoint : ExitPoints) + IRBuilder<>(ExitPoint).CreateLifetimeEnd(AI, AllocaSize); + } + } + + return ContainsDynamicAllocas; +} + +namespace { +/// A class for recording information about inlining a landing pad. +class LandingPadInliningInfo { + /// Destination of the invoke's unwind. + BasicBlock *OuterResumeDest; + + /// Destination for the callee's resume. + BasicBlock *InnerResumeDest = nullptr; + + /// LandingPadInst associated with the detach. + Value *SpawnerLPad = nullptr; + + /// PHI for EH values from landingpad insts. + PHINode *InnerEHValuesPHI = nullptr; + + SmallVector UnwindDestPHIValues; + + /// Dominator tree to update. + DominatorTree *DT = nullptr; +public: + LandingPadInliningInfo(DetachInst *DI, BasicBlock *EHContinue, + Value *LPadValInEHContinue, + DominatorTree *DT = nullptr) + : OuterResumeDest(EHContinue), SpawnerLPad(LPadValInEHContinue), DT(DT) { + // Find the predecessor block of OuterResumeDest. + BasicBlock *DetachBB = DI->getParent(); + BasicBlock *DetachUnwind = DI->getUnwindDest(); + while (DetachUnwind != OuterResumeDest) { + DetachBB = DetachUnwind; + DetachUnwind = DetachUnwind->getUniqueSuccessor(); + } + + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the detach before removing the + // edge from this block. + BasicBlock::iterator I = OuterResumeDest->begin(); + for (; isa(I); ++I) { + if (&*I == LPadValInEHContinue) + continue; + // Save the value to use for this edge. + PHINode *PHI = cast(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(DetachBB)); + } + } + + LandingPadInliningInfo(InvokeInst *TaskFrameResume, + DominatorTree *DT = nullptr) + : OuterResumeDest(TaskFrameResume->getUnwindDest()), + SpawnerLPad(TaskFrameResume->getLandingPadInst()), DT(DT) { + // If there are PHI nodes in the unwind destination block, we need to keep + // track of which values came into them from the detach before removing the + // edge from this block. + BasicBlock *InvokeBB = TaskFrameResume->getParent(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (; isa(I); ++I) { + // Save the value to use for this edge. + PHINode *PHI = cast(I); + UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB)); + } + } + + /// The outer unwind destination is the target of unwind edges introduced for + /// calls within the inlined function. + BasicBlock *getOuterResumeDest() const { + return OuterResumeDest; + } + + BasicBlock *getInnerResumeDest(); + + /// Forward a task resume - a terminator, such as a detached.rethrow or + /// taskframe.resume, marking the exit from a task for exception handling - to + /// the spawner's landing pad block. When the landing pad block has only one + /// predecessor, this is a simple branch. When there is more than one + /// predecessor, we need to split the landing pad block after the landingpad + /// instruction and jump to there. + void forwardTaskResume(InvokeInst *TR); + + /// Add incoming-PHI values to the unwind destination block for the given + /// basic block, using the values for the original invoke's source block. + void addIncomingPHIValuesFor(BasicBlock *BB) const { + addIncomingPHIValuesForInto(BB, OuterResumeDest); + } + + void addIncomingPHIValuesForInto(BasicBlock *Src, BasicBlock *Dest) const { + BasicBlock::iterator I = Dest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *Phi = cast(I); + Phi->addIncoming(UnwindDestPHIValues[i], Src); + } + } +}; +} // end anonymous namespace + +/// Get or create a target for the branch from ResumeInsts. +BasicBlock *LandingPadInliningInfo::getInnerResumeDest() { + if (InnerResumeDest) return InnerResumeDest; + + // Split the outer resume destionation. + BasicBlock::iterator SplitPoint; + if (isa(SpawnerLPad)) + SplitPoint = ++cast(SpawnerLPad)->getIterator(); + else + SplitPoint = OuterResumeDest->getFirstNonPHI()->getIterator(); + InnerResumeDest = + OuterResumeDest->splitBasicBlock(SplitPoint, + OuterResumeDest->getName() + ".body"); + if (DT) + // OuterResumeDest dominates InnerResumeDest, which dominates all other + // nodes dominated by OuterResumeDest. + if (DomTreeNode *OldNode = DT->getNode(OuterResumeDest)) { + std::vector Children(OldNode->begin(), OldNode->end()); + + DomTreeNode *NewNode = DT->addNewBlock(InnerResumeDest, OuterResumeDest); + for (DomTreeNode *I : Children) + DT->changeImmediateDominator(I, NewNode); + } + + // The number of incoming edges we expect to the inner landing pad. + const unsigned PHICapacity = 2; + + // Create corresponding new PHIs for all the PHIs in the outer landing pad. + Instruction *InsertPoint = &InnerResumeDest->front(); + BasicBlock::iterator I = OuterResumeDest->begin(); + for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) { + PHINode *OuterPHI = cast(I); + PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity, + OuterPHI->getName() + ".lpad-body", + InsertPoint); + OuterPHI->replaceAllUsesWith(InnerPHI); + InnerPHI->addIncoming(OuterPHI, OuterResumeDest); + } + + // Create a PHI for the exception values. + InnerEHValuesPHI = PHINode::Create(SpawnerLPad->getType(), PHICapacity, + "eh.lpad-body", InsertPoint); + SpawnerLPad->replaceAllUsesWith(InnerEHValuesPHI); + InnerEHValuesPHI->addIncoming(SpawnerLPad, OuterResumeDest); + + // All done. + return InnerResumeDest; +} + +// Helper method to remove Pred from the PHI nodes of BB, if Pred is present in +// those PHI nodes. Unlike BasicBlock::removePredecessor, this method does not +// error if Pred is not found in a PHI node of BB. +static void maybeRemovePredecessor(BasicBlock *BB, BasicBlock *Pred) { + for (PHINode &PN : BB->phis()) { + int BBIdx = PN.getBasicBlockIndex(Pred); + if (-1 != BBIdx) + PN.removeIncomingValue(BBIdx); + } +} + +/// Forward a task resume - a terminator, such as a detached.rethrow or +/// taskframe.resume, marking the exit from a task for exception handling - to +/// the spawner's landing pad block. When the landing pad block has only one +/// predecessor, this is a simple branch. When there is more than one +/// predecessor, we need to split the landing pad block after the landingpad +/// instruction and jump to there. +void LandingPadInliningInfo::forwardTaskResume(InvokeInst *TR) { + BasicBlock *Dest = getInnerResumeDest(); + BasicBlock *Src = TR->getParent(); + + BranchInst::Create(Dest, Src); + if (DT) + DT->changeImmediateDominator( + Dest, DT->findNearestCommonDominator(Dest, Src)); + + // Update the PHIs in the destination. They were inserted in an order which + // makes this work. + addIncomingPHIValuesForInto(Src, Dest); + + InnerEHValuesPHI->addIncoming(TR->getOperand(1), Src); + + // Update the DT + BasicBlock *NormalDest = nullptr, *UnwindDest = nullptr; + SmallVector Updates; + if (DT) { + if (TR->getNormalDest()->getSinglePredecessor()) + NormalDest = TR->getNormalDest(); + Updates.push_back({DominatorTree::Delete, Src, TR->getNormalDest()}); + + if (TR->getUnwindDest()->getSinglePredecessor()) + UnwindDest = TR->getUnwindDest(); + Updates.push_back({DominatorTree::Delete, Src, TR->getUnwindDest()}); + } + + // Remove the TR + if (!NormalDest) + TR->getNormalDest()->removePredecessor(Src); + if (!UnwindDest) + TR->getUnwindDest()->removePredecessor(Src); + + TR->eraseFromParent(); + + if (DT) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + DTU.applyUpdates(Updates); + } + + if (NormalDest) { + for (BasicBlock *Succ : successors(NormalDest)) + maybeRemovePredecessor(Succ, NormalDest); + NormalDest->eraseFromParent(); + } + if (UnwindDest) { + for (BasicBlock *Succ : successors(UnwindDest)) + maybeRemovePredecessor(Succ, UnwindDest); + UnwindDest->eraseFromParent(); + } +} + +static void handleDetachedLandingPads( + DetachInst *DI, BasicBlock *EHContinue, Value *LPadValInEHContinue, + SmallPtrSetImpl &InlinedLPads, + SmallVectorImpl &DetachedRethrows, + DominatorTree *DT = nullptr) { + LandingPadInliningInfo DetUnwind(DI, EHContinue, LPadValInEHContinue, DT); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = DI->getLandingPadInst(); + for (LandingPadInst *InlinedLPad : InlinedLPads) { + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + + // Forward the detached rethrows. + for (Instruction *DR : DetachedRethrows) + DetUnwind.forwardTaskResume(cast(DR)); +} + +void llvm::cloneEHBlocks(Function *F, + SmallVectorImpl &EHBlocksToClone, + SmallPtrSetImpl &EHBlockPreds, + const char *Suffix, + SmallPtrSetImpl *InlinedLPads, + SmallVectorImpl *DetachedRethrows, + DominatorTree *DT, LoopInfo *LI) { + ValueToValueMapTy VMap; + SmallVector NewBlocks; + SmallPtrSet NewBlocksSet; + SmallPtrSet NewInlinedLPads; + SmallPtrSet NewDetachedRethrows; + NewLoopsMap NewLoops; + for (BasicBlock *BB : EHBlocksToClone) { + BasicBlock *New = CloneBasicBlock(BB, VMap, Suffix, F); + VMap[BB] = New; + if (DT) + DT->addNewBlock(New, DT->getRoot()); + + // If the cloned block is inside of a loop, update LoopInfo. + if (LI && LI->getLoopFor(BB)) { + Loop *OldLoop = LI->getLoopFor(BB); + Loop *ParentLoop = OldLoop->getParentLoop(); + if (ParentLoop && !NewLoops.count(ParentLoop)) + NewLoops[ParentLoop] = ParentLoop; + addClonedBlockToLoopInfo(BB, New, LI, NewLoops); + } + + NewBlocks.push_back(New); + NewBlocksSet.insert(New); + } + + // Remap instructions in the cloned blocks based on VMap. + remapInstructionsInBlocks(NewBlocks, VMap); + + SmallPtrSet NewSuccSet; + // For all old successors, remove the predecessors in EHBlockPreds. + for (BasicBlock *EHPred : EHBlockPreds) + for (BasicBlock *OldSucc : successors(EHPred)) + if (VMap.count(OldSucc)) { + OldSucc->removePredecessor(EHPred); + NewSuccSet.insert(cast(VMap[OldSucc])); + } + + // For all new successors, remove the predecessors not in EHBlockPreds. + for (BasicBlock *NewSucc : NewSuccSet) { + for (BasicBlock::iterator I = NewSucc->begin(); isa(I); ) { + PHINode *PN = cast(I++); + + // NOTE! This loop walks backwards for a reason! First off, this minimizes + // the cost of removal if we end up removing a large number of values, and + // second off, this ensures that the indices for the incoming values + // aren't invalidated when we remove one. + for (int64_t i = PN->getNumIncomingValues() - 1; i >= 0; --i) + if (!EHBlockPreds.count(PN->getIncomingBlock(i))) + PN->removeIncomingValue(i, false); + } + } + + // Update the dominator tree and edges from EHBlockPreds to cloned EHBlocks. + for (BasicBlock *EHBlock : EHBlocksToClone) { + BasicBlock *IDomBB = nullptr; + if (DT) { + IDomBB = DT->getNode(EHBlock)->getIDom()->getBlock(); + if (VMap.count(IDomBB)) { + DT->changeImmediateDominator(cast(VMap[EHBlock]), + cast(VMap[IDomBB])); + } else { + IDomBB = nullptr; + // Get the idom of EHBlock's predecessors. + for (BasicBlock *Pred : predecessors(EHBlock)) { + if (EHBlockPreds.contains(Pred)) { + if (IDomBB) + IDomBB = DT->findNearestCommonDominator(IDomBB, Pred); + else + IDomBB = Pred; + } + } + assert(IDomBB && "Found no predecessors of EHBlock in EHBlockPreds."); + // Use this computed idom (or its clone) as the idom of the cloned + // EHBlock. + if (VMap.count(IDomBB)) { + DT->changeImmediateDominator(cast(VMap[EHBlock]), + cast(VMap[IDomBB])); + } else { + DT->changeImmediateDominator(cast(VMap[EHBlock]), + IDomBB); + } + } + } + } + + // Move the edges from Preds to point to NewEHBlock instead of EHBlock. + for (BasicBlock *EHBlock : EHBlocksToClone) { + BasicBlock *NewEHBlock = cast(VMap[EHBlock]); + DomTreeNodeBase *Node = DT ? DT->getNode(EHBlock) : nullptr; + BasicBlock *EHBlockIDom = Node ? Node->getIDom()->getBlock() : nullptr; + for (BasicBlock *Pred : EHBlockPreds) { + // This is slightly more strict than necessary; the minimum requirement is + // that there be no more than one indirectbr branching to BB. And all + // BlockAddress uses would need to be updated. + assert(!isa(Pred->getTerminator()) && + "Cannot split an edge from an IndirectBrInst"); + Pred->getTerminator()->replaceUsesOfWith(EHBlock, NewEHBlock); + if (DT && EHBlockIDom) + DT->deleteEdge(Pred, EHBlock); + } + } + + // Update all successors of the cloned EH blocks. + for (BasicBlock *BB : EHBlocksToClone) { + for (BasicBlock *Succ : successors(BB)) { + if (NewBlocksSet.count(Succ) || VMap.count(Succ)) + continue; + + // Update the PHI's in the successor of the cloned EH block. + for (PHINode &PN : Succ->phis()) { + Value *Val = PN.getIncomingValueForBlock(BB); + Value *NewVal = VMap.count(Val) ? cast(VMap[Val]) : Val; + PN.addIncoming(NewVal, cast(VMap[BB])); + } + } + } + + if (DT && LI) { + // If any EHBlocks become unreachable, update LoopInfo to remove the + // relevant loops. + for (BasicBlock *EHBlock : EHBlocksToClone) { + if (!DT->isReachableFromEntry(EHBlock)) { + Loop *L = nullptr; + if (LI->isLoopHeader(EHBlock)) { + // Delete the whole loop. + L = LI->getLoopFor(EHBlock); + if (Loop *ParentL = L->getParentLoop()) + ParentL->removeChildLoop(llvm::find(*ParentL, L)); + else + LI->removeLoop(llvm::find(*LI, L)); + } + LI->removeBlock(EHBlock); + // If EHBlock is a loop header, finish destroying the whole loop. + if (L) + LI->destroy(L); + } + } + } + + // Move the new InlinedLPads and DetachedRethrows to the appropriate + // set/vector. + if (InlinedLPads) { + for (LandingPadInst *LPad : *InlinedLPads) { + if (VMap.count(LPad)) + NewInlinedLPads.insert(cast(VMap[LPad])); + else + NewInlinedLPads.insert(LPad); + } + InlinedLPads->clear(); + for (LandingPadInst *LPad : NewInlinedLPads) + InlinedLPads->insert(LPad); + } + if (DetachedRethrows) { + for (Instruction *DR : *DetachedRethrows) { + if (VMap.count(DR)) + NewDetachedRethrows.insert(cast(VMap[DR])); + else + NewDetachedRethrows.insert(DR); + } + DetachedRethrows->clear(); + for (Instruction *DR : NewDetachedRethrows) + DetachedRethrows->push_back(DR); + } +} + +// Helper function to find landingpads in the specified taskframe. +static void getTaskFrameLandingPads( + Value *TaskFrame, Instruction *TaskFrameResume, + SmallPtrSetImpl &InlinedLPads) { + const BasicBlock *TaskFrameBB = cast(TaskFrame)->getParent(); + SmallVector Worklist; + SmallPtrSet Visited; + // Add the parent of TaskFrameResume to the worklist. + Worklist.push_back(TaskFrameResume->getParent()); + + while (!Worklist.empty()) { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Terminate the search once we encounter the BB where the taskframe is + // defined. + if (TaskFrameBB == BB) + continue; + + // If we find a landingpad, add it to the set. + if (BB->isLandingPad()) + InlinedLPads.insert(BB->getLandingPadInst()); + + // Add predecessors to the worklist, but skip any predecessors within nested + // tasks or nested taskframes. + for (BasicBlock *Pred : predecessors(BB)) { + if (isa(Pred->getTerminator()) || + isDetachedRethrow(Pred->getTerminator()) || + isTaskFrameResume(Pred->getTerminator())) + continue; + Worklist.push_back(Pred); + } + } +} + +// Helper method to handle a given taskframe.resume. +static void handleTaskFrameResume(Value *TaskFrame, + Instruction *TaskFrameResume, + DominatorTree *DT = nullptr) { + // Get landingpads to inline. + SmallPtrSet InlinedLPads; + getTaskFrameLandingPads(TaskFrame, TaskFrameResume, InlinedLPads); + + InvokeInst *TFR = cast(TaskFrameResume); + LandingPadInliningInfo TFResumeDest(TFR, DT); + + // Append the clauses from the outer landing pad instruction into the inlined + // landing pad instructions. + LandingPadInst *OuterLPad = TFR->getLandingPadInst(); + for (LandingPadInst *InlinedLPad : InlinedLPads) { + unsigned OuterNum = OuterLPad->getNumClauses(); + InlinedLPad->reserveClauses(OuterNum); + for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx) + InlinedLPad->addClause(OuterLPad->getClause(OuterIdx)); + if (OuterLPad->isCleanup()) + InlinedLPad->setCleanup(true); + } + + // Forward the taskframe.resume. + TFResumeDest.forwardTaskResume(TFR); +} + +void llvm::InlineTaskFrameResumes(Value *TaskFrame, DominatorTree *DT) { + SmallVector TaskFrameResumes; + // Record all taskframe.resume markers that use TaskFrame. + for (User *U : TaskFrame->users()) + if (Instruction *I = dyn_cast(U)) + if (isTaskFrameResume(I)) + TaskFrameResumes.push_back(I); + + // Handle all taskframe.resume markers. + for (Instruction *TFR : TaskFrameResumes) + handleTaskFrameResume(TaskFrame, TFR, DT); +} + +static void startSerializingTaskFrame(Value *TaskFrame, + SmallVectorImpl &ToErase, + DominatorTree *DT, + bool PreserveTaskFrame) { + for (User *U : TaskFrame->users()) + if (Instruction *UI = dyn_cast(U)) + if (isTapirIntrinsic(Intrinsic::taskframe_use, UI)) + ToErase.push_back(UI); + + if (!PreserveTaskFrame) + InlineTaskFrameResumes(TaskFrame, DT); +} + +void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry, + BasicBlock *EHContinue, Value *LPadValInEHContinue, + SmallVectorImpl &Reattaches, + SmallVectorImpl *EHBlocksToClone, + SmallPtrSetImpl *EHBlockPreds, + SmallPtrSetImpl *InlinedLPads, + SmallVectorImpl *DetachedRethrows, + bool ReplaceWithTaskFrame, DominatorTree *DT, + LoopInfo *LI) { + BasicBlock *Spawner = DI->getParent(); + BasicBlock *TaskEntry = DI->getDetached(); + BasicBlock *Continue = DI->getContinue(); + BasicBlock *Unwind = DI->getUnwindDest(); + Value *SyncRegion = DI->getSyncRegion(); + Module *M = Spawner->getModule(); + + // If the spawned task has a taskframe, serialize the taskframe. + SmallVector ToErase; + Value *TaskFrame = getTaskFrameUsed(TaskEntry); + if (TaskFrame) + startSerializingTaskFrame(TaskFrame, ToErase, DT, ReplaceWithTaskFrame); + + // Clone any EH blocks that need cloning. + if (EHBlocksToClone) { + assert(EHBlockPreds && + "Given EH blocks to clone, but not blocks exiting to them."); + cloneEHBlocks(Spawner->getParent(), *EHBlocksToClone, *EHBlockPreds, ".sd", + InlinedLPads, DetachedRethrows, DT, LI); + } + + // Collect the exit points into a single vector. + SmallVector ExitPoints; + for (Instruction *Exit : Reattaches) + ExitPoints.push_back(Exit); + if (DetachedRethrows) + for (Instruction *Exit : *DetachedRethrows) + ExitPoints.push_back(Exit); + + // Move static alloca instructions in the task entry to the appropriate entry + // block. + bool ContainsDynamicAllocas = + MoveStaticAllocasInBlock(ParentEntry, TaskEntry, ExitPoints); + // If the cloned loop contained dynamic alloca instructions, wrap the inlined + // code with llvm.stacksave/llvm.stackrestore intrinsics. + if (ContainsDynamicAllocas) { + // Get the two intrinsics we care about. + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore = + Intrinsic::getDeclaration(M,Intrinsic::stackrestore); + + // Insert the llvm.stacksave. + CallInst *SavedPtr = IRBuilder<>(TaskEntry, TaskEntry->begin()) + .CreateCall(StackSave, {}, "savedstack"); + + // Insert a call to llvm.stackrestore before the reattaches in the original + // Tapir loop. + for (Instruction *Exit : ExitPoints) + IRBuilder<>(Exit).CreateCall(StackRestore, SavedPtr); + } + + // If we're replacing the detach with a taskframe and we don't have a + // taskframe already, create one. + if (ReplaceWithTaskFrame) { + if (!TaskFrame) { + // Create a new task frame. + Function *TFCreate = + Intrinsic::getDeclaration(M, Intrinsic::taskframe_create); + TaskFrame = IRBuilder<>(TaskEntry, TaskEntry->begin()) + .CreateCall(TFCreate, {}, "repltf"); + } + } + + // Handle any detached-rethrows in the task. + bool HasUnwind = DI->hasUnwindDest(); + if (HasUnwind) { + assert(InlinedLPads && "Missing set of landing pads in task."); + assert(DetachedRethrows && "Missing set of detached rethrows in task."); + if (ReplaceWithTaskFrame) { + // If we're replacing the detach with a taskframe, simply replace the + // detached.rethrow intrinsics with taskframe.resume intrinsics. + for (Instruction *I : *DetachedRethrows) { + InvokeInst *II = cast(I); + Value *LPad = II->getArgOperand(1); + Function *TFResume = Intrinsic::getDeclaration( + M, Intrinsic::taskframe_resume, {LPad->getType()}); + IRBuilder<>(II).CreateInvoke(TFResume, II->getNormalDest(), + II->getUnwindDest(), {TaskFrame, LPad}); + II->eraseFromParent(); + } + } else { + // Otherwise, "inline" the detached landingpads. + handleDetachedLandingPads(DI, EHContinue, LPadValInEHContinue, + *InlinedLPads, *DetachedRethrows, DT); + } + } + + // Replace reattaches with unconditional branches to the continuation. + BasicBlock *ReattachDom = nullptr; + for (Instruction *I : Reattaches) { + assert(isa(I) && "Recorded reattach is not a reattach"); + assert(cast(I)->getSyncRegion() == SyncRegion && + "Reattach does not match sync region of detach."); + if (DT) { + if (!ReattachDom) + ReattachDom = I->getParent(); + else + ReattachDom = DT->findNearestCommonDominator(ReattachDom, + I->getParent()); + } + + // If we're replacing the detach with a taskframe, insert a taskframe.end + // immediately before the reattach. + if (ReplaceWithTaskFrame) { + Function *TFEnd = Intrinsic::getDeclaration(M, Intrinsic::taskframe_end); + IRBuilder<>(I).CreateCall(TFEnd, {TaskFrame}); + } + ReplaceInstWithInst(I, BranchInst::Create(Continue)); + } + + // Replace the detach with an unconditional branch to the task entry. + Continue->removePredecessor(Spawner); + if (HasUnwind) + Unwind->removePredecessor(Spawner); + ReplaceInstWithInst(DI, BranchInst::Create(TaskEntry)); + + // Erase instructions marked to be erased. + for (Instruction *I : ToErase) + I->eraseFromParent(); + + // Update dominator tree. + if (DT) { + if (ReattachDom && DT->dominates(Spawner, Continue)) + DT->changeImmediateDominator(Continue, ReattachDom); + if (HasUnwind) + DT->deleteEdge(Spawner, Unwind); + } +} + +/// Analyze a task for serialization +void llvm::AnalyzeTaskForSerialization( + Task *T, SmallVectorImpl &Reattaches, + SmallVectorImpl &EHBlocksToClone, + SmallPtrSetImpl &EHBlockPreds, + SmallPtrSetImpl &InlinedLPads, + SmallVectorImpl &DetachedRethrows) { + assert(!T->isRootTask() && "Cannot serialize root task."); + Value *SyncRegion = T->getDetach()->getSyncRegion(); + for (Spindle *S : depth_first>(T->getEntrySpindle())) { + // Look for landing pads in the task (and no subtask) to be merged with a + // spawner landing pad. + for (BasicBlock *BB : S->blocks()) { + // Record any shared-EH blocks that need to be cloned. + if (S->isSharedEH()) { + // Skip basic blocks that are placeholder successors + if (isPlaceholderSuccessor(BB)) + continue; + + EHBlocksToClone.push_back(BB); + if (S->getEntry() == BB) + for (BasicBlock *Pred : predecessors(BB)) + if (T->simplyEncloses(Pred)) + EHBlockPreds.insert(Pred); + } + + if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + if (!isDetachedRethrow(BB->getTerminator(), SyncRegion)) { + assert(!isDetachedRethrow(BB->getTerminator()) && + "Detached rethrow in task does not match sync region."); + // Record this landing pad to merge with DI's landing pad. + InlinedLPads.insert(II->getLandingPadInst()); + } + } else if (DetachInst *SubDI = dyn_cast(BB->getTerminator())) + if (SubDI->hasUnwindDest()) + // Record this landing pad to merge with DI's landing pad. + InlinedLPads.insert(SubDI->getLandingPadInst()); + } + + if (!T->isTaskExiting(S)) + continue; + + // Find the reattach and detached-rethrow exits from this task. + for (BasicBlock *BB : S->blocks()) { + if (isa(BB->getTerminator())) { + assert(cast(BB->getTerminator())->getSyncRegion() == + SyncRegion && + "Reattach in task does not match sync region with detach."); + Reattaches.push_back(BB->getTerminator()); + } else if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + if (isDetachedRethrow(II, SyncRegion)) + // Get detached rethrows in the task to forward. + DetachedRethrows.push_back(II); + } + } + } +} + +/// Serialize the detach DI that spawns task T. If provided, the dominator tree +/// DT will be updated to reflect the serialization. +void llvm::SerializeDetach(DetachInst *DI, Task *T, bool ReplaceWithTaskFrame, + DominatorTree *DT) { + assert(DI && "SerializeDetach given nullptr for detach."); + assert(DI == T->getDetach() && "Task and detach arguments do not match."); + SmallVector EHBlocksToClone; + SmallPtrSet EHBlockPreds; + SmallVector Reattaches; + SmallPtrSet InlinedLPads; + SmallVector DetachedRethrows; + + AnalyzeTaskForSerialization(T, Reattaches, EHBlocksToClone, EHBlockPreds, + InlinedLPads, DetachedRethrows); + BasicBlock *EHContinue = nullptr; + Value *LPadVal = nullptr; + if (DI->hasUnwindDest()) { + EHContinue = T->getEHContinuationSpindle()->getEntry(); + LPadVal = T->getLPadValueInEHContinuationSpindle(); + } + SerializeDetach(DI, T->getParentTask()->getEntry(), EHContinue, LPadVal, + Reattaches, &EHBlocksToClone, &EHBlockPreds, &InlinedLPads, + &DetachedRethrows, ReplaceWithTaskFrame, DT); +} + +static bool isCanonicalTaskFrameEnd(const Instruction *TFEnd) { + // Check that the last instruction in the basic block containing TFEnd is + // TFEnd. + const Instruction *Term = &TFEnd->getParent()->back(); + if (!Term || isa(Term) || isa(Term)) + return false; + + const Instruction *Prev = Term->getPrevNode(); + if (!Prev || Prev != TFEnd) + return false; + + return true; +} + +// Check if the basic block terminates a taskframe via a taskframe.end. +static bool endsUnassociatedTaskFrame(const BasicBlock *B) { + const Instruction *Prev = B->getTerminator()->getPrevNode(); + if (!Prev) + return false; + if (isTapirIntrinsic(Intrinsic::taskframe_end, Prev) && + isCanonicalTaskFrameEnd(Prev)) + return true; + return false; +} + +/// Checks if the given taskframe.create instruction is in canonical form. This +/// function mirrors the behavior of needToSplitTaskFrameCreate in +/// Transforms/Utils/TapirUtils. +static bool isCanonicalTaskFrameCreate(const Instruction *TFCreate) { + // If the taskframe.create is not the first instruction, split. + if (TFCreate != &TFCreate->getParent()->front()) + return false; + + // The taskframe.create is at the front of the block. Check that we have a + // single predecessor. + const BasicBlock *Pred = TFCreate->getParent()->getSinglePredecessor(); + if (!Pred) + return false; + + // Check that the single predecessor has a single successor. + if (!Pred->getSingleSuccessor()) + return false; + + // Check whether the single predecessor is terminated with a sync. + if (isa(Pred->getTerminator())) + return false; + + // If the taskframe.create has no users, ignore it. + if (TFCreate->user_empty()) + return false; + + // Check that the uses of the taskframe.create are canonical as well. + for (const User *U : TFCreate->users()) { + if (const Instruction *I = dyn_cast(U)) { + if (isTapirIntrinsic(Intrinsic::taskframe_use, I) || + isTapirIntrinsic(Intrinsic::taskframe_resume, I)) + return true; + if (isTapirIntrinsic(Intrinsic::taskframe_end, I)) + return isCanonicalTaskFrameEnd(I); + } + } + return true; +} + +static const Value *getCanonicalTaskFrameCreate(const BasicBlock *BB) { + if (const IntrinsicInst *II = dyn_cast(&BB->front())) + if (Intrinsic::taskframe_create == II->getIntrinsicID() && + isCanonicalTaskFrameCreate(II)) + return II; + return nullptr; +} + +/// GetDetachedCtx - Get the entry basic block to the detached context +/// that contains the specified block. +/// +BasicBlock *llvm::GetDetachedCtx(BasicBlock *BB) { + return const_cast( + GetDetachedCtx(const_cast(BB))); +} + +const BasicBlock *llvm::GetDetachedCtx(const BasicBlock *BB) { + // Traverse the CFG backwards until we either reach the entry block of the + // function or we find a detach instruction that detaches the current block. + SmallPtrSet Visited; + SmallVector WorkList; + SmallPtrSet TaskFramesToIgnore; + WorkList.push_back(BB); + while (!WorkList.empty()) { + const BasicBlock *CurrBB = WorkList.pop_back_val(); + if (!Visited.insert(CurrBB).second) + continue; + + // If we find a canonical taskframe.create that we're not ignoring, then + // we've found the context. + if (const Value *TaskFrame = getCanonicalTaskFrameCreate(CurrBB)) + if (!TaskFramesToIgnore.count(TaskFrame)) + return CurrBB; + + for (const BasicBlock *PredBB : predecessors(CurrBB)) { + // Skip predecessors via reattach instructions. The detacher block + // corresponding to this reattach is also a predecessor of the current + // basic block. + if (isa(PredBB->getTerminator())) + continue; + + // Skip predecessors via detach rethrows. + if (isDetachedRethrow(PredBB->getTerminator())) + continue; + + // If we find a taskframe.resume, add its taskframe to the set of + // taskframes to ignore. + if (isTaskFrameResume(PredBB->getTerminator())) { + const InvokeInst *II = cast(PredBB->getTerminator()); + TaskFramesToIgnore.insert(II->getArgOperand(0)); + } else if (endsUnassociatedTaskFrame(PredBB)) { + const CallBase *TFEnd = cast( + PredBB->getTerminator()->getPrevNode()); + TaskFramesToIgnore.insert(TFEnd->getArgOperand(0)); + } + + // If the predecessor is terminated by a detach, check to see if + // that detach spawned the current basic block. + if (isa(PredBB->getTerminator())) { + const DetachInst *DI = cast(PredBB->getTerminator()); + if (DI->getDetached() == CurrBB) + // Return the current block, which is the entry of this detached + // sub-CFG. + return CurrBB; + else if (const Value *SubTaskFrame = + getTaskFrameUsed(DI->getDetached())) + // Ignore this tasks's taskframe, if it has one. + TaskFramesToIgnore.insert(SubTaskFrame); + } + + // Otherwise, add the predecessor block to the work list to search. + WorkList.push_back(PredBB); + } + } + + // Our search didn't find anything, so return the entry of the function + // containing the given block. + return &(BB->getParent()->getEntryBlock()); +} + +// Returns true if the function may not be synced at the point of the given +// basic block, false otherwise. This function does a simple depth-first +// traversal of the CFG, and as such, produces a conservative result. +bool llvm::mayBeUnsynced(const BasicBlock *BB) { + SmallPtrSet Visited; + SmallVector WorkList; + SmallPtrSet TaskFramesToIgnore; + WorkList.push_back(BB); + while (!WorkList.empty()) { + const BasicBlock *CurrBB = WorkList.pop_back_val(); + if (!Visited.insert(CurrBB).second) + continue; + + // If we find a canonical taskframe.create that we're not ignoring, then + // we've found the context. + if (const Value *TaskFrame = getCanonicalTaskFrameCreate(CurrBB)) + if (!TaskFramesToIgnore.count(TaskFrame)) + continue; + + for (const BasicBlock *PredBB : predecessors(CurrBB)) { + // If we find a predecessor via reattach instructions, then + // wconservatively return that we may not be synced. + if (isa(PredBB->getTerminator())) + return true; + + // If we find a predecessor via a detached.rethrow, then conservatively + // return that we may not be synced. + if (isDetachedRethrow(PredBB->getTerminator())) + return true; + + // If we find a taskframe.resume, add its taskframe to the set of + // taskframes to ignore. + if (isTaskFrameResume(PredBB->getTerminator())) { + const InvokeInst *II = cast(PredBB->getTerminator()); + TaskFramesToIgnore.insert(II->getArgOperand(0)); + } else if (endsUnassociatedTaskFrame(PredBB)) { + const CallBase *TFEnd = cast( + PredBB->getTerminator()->getPrevNode()); + TaskFramesToIgnore.insert(TFEnd->getArgOperand(0)); + } + + // If the predecessor is terminated by a detach, check to see if + // that detach spawned the current basic block. + if (isa(PredBB->getTerminator())) { + const DetachInst *DI = cast(PredBB->getTerminator()); + if (DI->getDetached() != CurrBB) + // We encountered a continue or unwind destination of a detach. + // Conservatively return that we may not be synced. + return true; + } + + // Otherwise, add the predecessor block to the work list to search. + WorkList.push_back(PredBB); + } + } + return false; +} + +/// isDetachedContinueEdge - Return true if the edge from terminator instruction +/// TI to successor basic block Succ is a detach-continue edge. +bool llvm::isDetachContinueEdge(const Instruction *TI, const BasicBlock *Succ) { + if (isa(TI)) + return true; + if (isDetachedRethrow(TI)) + return Succ == cast(TI)->getUnwindDest(); + if (const DetachInst *DI = dyn_cast(TI)) + return Succ == DI->getContinue() || + (DI->hasUnwindDest() && Succ == DI->getUnwindDest()); + return false; +} + +/// isCriticalContinueEdge - Return true if the specified edge is a critical +/// detach-continue edge. Critical detach-continue edges are critical edges - +/// from a block with multiple successors to a block with multiple predecessors +/// - even after ignoring all reattach edges. +bool llvm::isCriticalContinueEdge(const Instruction *TI, unsigned SuccNum) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + // Edge must come from a detach. + if (!isa(TI)) return false; + // Edge must go to the continuation. + if (SuccNum != 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *DetachPred = TI->getParent(); + for (; I != E; ++I) { + if (DetachPred == *I) continue; + // Even if a reattach instruction isn't associated with the detach + // instruction TI, we can safely skip it, because it will be associated with + // a different detach instruction that precedes this block. + if (isa((*I)->getTerminator())) continue; + return true; + } + return false; +} + +/// canDetach - Return true if the given function can perform a detach, false +/// otherwise. +bool llvm::canDetach(const Function *F) { + for (const BasicBlock &BB : *F) + if (isa(BB.getTerminator())) + return true; + return false; +} + +void llvm::GetDetachedCFG(const DetachInst &DI, const DominatorTree &DT, + SmallPtrSetImpl &TaskBlocks, + SmallPtrSetImpl &EHBlocks, + SmallPtrSetImpl &TaskReturns) { + SmallVector Todo; + SmallVector WorkListEH; + + LLVM_DEBUG(dbgs() << "Finding CFG detached by " << DI << "\n"); + + BasicBlock *Detached = DI.getDetached(); + BasicBlock *Continue = DI.getContinue(); + Value *SyncRegion = DI.getSyncRegion(); + BasicBlockEdge DetachEdge(DI.getParent(), Detached); + + Todo.push_back(Detached); + while (!Todo.empty()) { + BasicBlock *BB = Todo.pop_back_val(); + + if (!TaskBlocks.insert(BB).second) continue; + + LLVM_DEBUG(dbgs() << " Found block " << BB->getName() << "\n"); + + Instruction *Term = BB->getTerminator(); + if (nullptr == Term) + llvm_unreachable("BB with null terminator found."); + + if (ReattachInst *RI = dyn_cast(Term)) { + // Either a reattach instruction terminates the detached CFG or it + // terminates a nested detached CFG. If it terminates a nested detached + // CFG, it can simply be ignored, because the corresponding nested detach + // instruction will be processed later. + if (RI->getDetachContinue() != Continue) continue; + assert(RI->getSyncRegion() == SyncRegion && + "Reattach terminating detached CFG has nonmatching sync region."); + TaskReturns.insert(BB); + continue; + } else if (DetachInst *NestedDI = dyn_cast(Term)) { + assert(NestedDI != &DI && "Found recursive Detach"); + // Add the successors of the nested detach instruction for searching. + Todo.push_back(NestedDI->getDetached()); + Todo.push_back(NestedDI->getContinue()); + if (NestedDI->hasUnwindDest()) + Todo.push_back(NestedDI->getUnwindDest()); + continue; + } else if (SyncInst *SI = dyn_cast(Term)) { + // A sync instruction should only apply to nested detaches within this + // task. Hence it can be treated like a branch. + assert(SI->getSyncRegion() != SyncRegion && + "Sync in detached task applies to parent parallel context."); + Todo.push_back(SI->getSuccessor(0)); + continue; + } else if (isa(Term) || isa(Term) || + isa(Term)) { + if (isDetachedRethrow(Term, SyncRegion)) { + // A detached rethrow terminates this task and is included in the set of + // exception-handling blocks that might not be unique to this task. + LLVM_DEBUG(dbgs() << " Exit block " << BB->getName() << "\n"); + TaskReturns.insert(BB); + EHBlocks.insert(BB); + } else { + for (BasicBlock *Succ : successors(BB)) { + if (DT.dominates(DetachEdge, Succ)) { + LLVM_DEBUG(dbgs() << + "Adding successor " << Succ->getName() << "\n"); + Todo.push_back(Succ); + } else { + // We assume that this block is an exception-handling block and save + // it for later processing. + LLVM_DEBUG(dbgs() << + " Exit block to search " << Succ->getName() << "\n"); + EHBlocks.insert(Succ); + WorkListEH.push_back(Succ); + } + } + } + continue; + } else if (isa(Term)) { + // We don't bother cloning unreachable exits from the detached CFG at this + // point. We're cloning the entire detached CFG anyway when we outline + // the function. + continue; + } else { + llvm_unreachable("Detached task does not absolutely terminate in reattach"); + } + } + + // Find the exception-handling exit blocks. + { + SmallPtrSet Visited; + while (!WorkListEH.empty()) { + BasicBlock *BB = WorkListEH.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Make sure that the control flow through these exception-handling blocks + // cannot re-enter the blocks being outlined. + assert(!TaskBlocks.count(BB) && + "EH blocks for a detached task reenter that task."); + + // Make sure that the control flow through these exception-handling blocks + // doesn't perform an ordinary return or resume. + assert(!isa(BB->getTerminator()) && + "EH block terminated by return."); + assert(!isa(BB->getTerminator()) && + "EH block terminated by resume."); + + // Make sure that the control flow through these exception-handling blocks + // doesn't reattach to the detached CFG's continuation. + LLVM_DEBUG({ + if (ReattachInst *RI = dyn_cast(BB->getTerminator())) + assert(RI->getSuccessor(0) != Continue && + "Exit block reaches a reattach to the continuation."); + }); + + // Stop searching down this path upon finding a detached rethrow. + if (isDetachedRethrow(BB->getTerminator(), SyncRegion)) { + TaskReturns.insert(BB); + continue; + } + + for (BasicBlock *Succ : successors(BB)) { + EHBlocks.insert(Succ); + WorkListEH.push_back(Succ); + } + } + + // Visited now contains exception-handling blocks that we want to clone as + // part of outlining. + for (BasicBlock *EHBlock : Visited) + TaskBlocks.insert(EHBlock); + } + + LLVM_DEBUG({ + dbgs() << "Exit blocks:"; + for (BasicBlock *Exit : EHBlocks) { + if (DT.dominates(DetachEdge, Exit)) + dbgs() << "(dominated)"; + else + dbgs() << "(shared)"; + dbgs() << *Exit; + } + dbgs() << "\n"; + }); +} + +// Helper function to find PHI nodes that depend on the landing pad in the +// unwind destination of this task's detach. +void llvm::getDetachUnwindPHIUses(DetachInst *DI, + SmallPtrSetImpl &UnwindPHIs) { + // Get the landing pad of the unwind destination of the detach. + LandingPadInst *LPad = nullptr; + if (DI && DI->hasUnwindDest()) { + BasicBlock *UnwindDest = DI->getUnwindDest(); + LPad = UnwindDest->getLandingPadInst(); + assert(LPad && "Unwind of detach is not a landing pad."); + } + if (!LPad) return; + + // Walk the chain of uses of this landing pad to find all PHI nodes that + // depend on it, directly or indirectly. + SmallVector WorkList; + SmallPtrSet Visited; + for (User *U : LPad->users()) + WorkList.push_back(U); + + while (!WorkList.empty()) { + User *Curr = WorkList.pop_back_val(); + if (!Visited.insert(Curr).second) continue; + + // If we find a PHI-node user, add it to UnwindPHIs + if (PHINode *PN = dyn_cast(Curr)) + UnwindPHIs.insert(PN->getParent()); + + // Queue the successors for processing + for (User *U : Curr->users()) + WorkList.push_back(U); + } +} + +/// Return the taskframe used in the given detached block. +Value *llvm::getTaskFrameUsed(BasicBlock *Detached) { + // Scan the detached block for a taskframe.use intrinsic. If we find one, + // return its argument. + for (const Instruction &I : *Detached) + if (const IntrinsicInst *II = dyn_cast(&I)) + if (Intrinsic::taskframe_use == II->getIntrinsicID()) + return II->getArgOperand(0); + return nullptr; +} + +// Helper function to check if the given taskframe.create instruction requires +// the parent basic block to be split in order to canonicalize the +// representation of taskframes. +static bool needToSplitTaskFrameCreate(const Instruction *TFCreate) { + // If the taskframe.create is not the first instruction, split. + if (TFCreate != &TFCreate->getParent()->front()) + return true; + + // The taskframe.create is at the front of the block. Check that we have a + // single predecessor. + const BasicBlock *Pred = TFCreate->getParent()->getSinglePredecessor(); + if (!Pred) + return true; + + // Check that the single predecessor has a single successor. + if (!Pred->getSingleSuccessor()) + return true; + + // Check whether the single predecessor is terminated with a sync. + if (isa(Pred->getTerminator())) + return true; + + return false; +} + +// Helper function to check if the given taskframe.end instruction requires the +// parent basic block to be split in order to canonicalize the representation of +// taskframes. +static bool needToSplitTaskFrameEnd(const Instruction *TFEnd) { + const BasicBlock *B = TFEnd->getParent(); + // If the taskframe.end is not the penultimate instruction, split. + if (TFEnd != B->getTerminator()->getPrevNode()) + return true; + + // Check whether the parent block has a single successor. + const BasicBlock *Succ = B->getSingleSuccessor(); + if (!Succ) + return true; + + // Check that the single successor has a single predecessor. + if (!Succ->getSinglePredecessor()) + return true; + + // Check that the single successor is not a taskframe.create entry. + if (isTapirIntrinsic(Intrinsic::taskframe_create, &Succ->front())) + return true; + + // Check whether the parent block is terminated with a sync or a reattach. + if (isa(B->getTerminator()) || + isa(B->getTerminator())) + return true; + + return false; +} + +/// Split blocks in function F containing taskframe.create calls to canonicalize +/// the representation of Tapir taskframes in F. +bool llvm::splitTaskFrameCreateBlocks(Function &F, DominatorTree *DT, + TaskInfo *TI, LoopInfo *LI, + MemorySSAUpdater *MSSAU) { + if (F.empty()) + return false; + + // Scan the function for taskframe.create instructions to split. + SmallVector TFCreateToSplit; + SmallVector DetachesWithTaskFrames; + SmallVector TFEndToSplit; + SmallVector TFResumeToSplit; + SmallVector WorkList; + SmallPtrSet Visited; + WorkList.push_back(&F.getEntryBlock()); + while (!WorkList.empty()) { + BasicBlock *BB = WorkList.pop_back_val(); + if (!Visited.insert(BB).second) + continue; + + // Scan the instructions in BB for taskframe.create intrinsics. + for (Instruction &I : *BB) { + if (IntrinsicInst *II = dyn_cast(&I)) { + if (Intrinsic::taskframe_create == II->getIntrinsicID()) { + // Record this taskframe.create for splitting. + LLVM_DEBUG(dbgs() << "Pushing TFCreate " << *II << "\n"); + TFCreateToSplit.push_back(II); + + // Look for a detach instructions and taskframe.end intrinsics that + // use this taskframe. + for (User *U : II->users()) { + if (IntrinsicInst *UI = dyn_cast(U)) { + if (Intrinsic::taskframe_use == UI->getIntrinsicID()) { + if (BasicBlock *Pred = UI->getParent()->getSinglePredecessor()) + if (DetachInst *DI = + dyn_cast(Pred->getTerminator())) { + // Record this detach as using a taskframe. + DetachesWithTaskFrames.push_back(DI); + break; + } + } else if (Intrinsic::taskframe_end == UI->getIntrinsicID()) { + // Record this taskframe.end. + TFEndToSplit.push_back(UI); + } + } else if (Instruction *UI = dyn_cast(U)) { + if (isTaskFrameResume(UI, II)) { + // Record this taskframe.resume. + TFResumeToSplit.push_back(UI); + } + } + } + } + } + } + + // Add all successors of BB + for (BasicBlock *Succ : successors(BB)) + WorkList.push_back(Succ); + } + + bool Changed = false; + // Split the basic blocks containing taskframe.create calls so that the + // taskframe.create call starts the basic block. + for (Instruction *I : TFCreateToSplit) + if (needToSplitTaskFrameCreate(I)) { + LLVM_DEBUG(dbgs() << "Splitting at " << *I << "\n"); + StringRef OldName = I->getParent()->getName(); + SplitBlock(I->getParent(), I, DT, LI, MSSAU); + I->getParent()->setName(OldName+".tf"); + Changed = true; + } + + // Split basic blocks containing taskframe.end calls, so that they end with an + // unconditional branch immediately after the taskframe.end call. + for (Instruction *TFEnd : TFEndToSplit) + if (needToSplitTaskFrameEnd(TFEnd)) { + LLVM_DEBUG(dbgs() << "Splitting block after " << *TFEnd << "\n"); + BasicBlock::iterator Iter = ++TFEnd->getIterator(); + SplitBlock(TFEnd->getParent(), &*Iter, DT, LI, MSSAU); + // Try to attach debug info to the new terminator after the taskframe.end + // call. + Instruction *SplitTerminator = TFEnd->getParent()->getTerminator(); + if (!SplitTerminator->getDebugLoc()) + SplitTerminator->setDebugLoc(TFEnd->getDebugLoc()); + Iter->getParent()->setName(TFEnd->getParent()->getName() + ".tfend"); + Changed = true; + } + + // Split critical continue edges, if we need to. For example, we need to + // split critical continue edges if we're planning to fixup external uses of + // variables defined in a taskframe. + // + // TODO: Predicate this canonicalization on something more intuitive than the + // existence of DT. + for (DetachInst *DI : DetachesWithTaskFrames) { + if (DT && isCriticalContinueEdge(DI, 1)) { + SplitCriticalEdge( + DI, 1, + CriticalEdgeSplittingOptions(DT, nullptr).setSplitDetachContinue()); + Changed = true; + } + } + // Similarly, split unwind edges from taskframe.resume's. + for (Instruction *TFResume : TFResumeToSplit) { + InvokeInst *II = cast(TFResume); + if (DT && isCriticalEdge(II, 1)) { + BasicBlock *Unwind = II->getUnwindDest(); + SplitBlockPredecessors(Unwind, {II->getParent()}, ".tfsplit", DT, LI, + MSSAU); + Changed = true; + } + } + + // Recalculate TaskInfo if necessary. + if (Changed && DT && TI) + TI->recalculate(F, *DT); + + return Changed; +} + +/// taskFrameContains - Returns true if the given basic block \p B is contained +/// within the taskframe \p TF. +bool llvm::taskFrameContains(const Spindle *TF, const BasicBlock *B, + const TaskInfo &TI) { + if (TF->getTaskFrameCreate()) { + if (TF->taskFrameContains(TI.getSpindleFor(B))) + return true; + } else { + // If TF is a task entry, check that that task encloses I's basic block. + return TF->getParentTask()->encloses(B); + } + return false; +} + +/// taskFrameEncloses - Returns true if the given basic block \p B is enclosed +/// within the taskframe \p TF. +bool llvm::taskFrameEncloses(const Spindle *TF, const BasicBlock *B, + const TaskInfo &TI) { + if (taskFrameContains(TF, B, TI)) + return true; + + if (!TF->getTaskFrameCreate()) + return false; + + // TF is a taskframe.create spindle. Recursively check its subtaskframes. + for (const Spindle *SubTF : TF->subtaskframes()) + if (taskFrameEncloses(SubTF, B, TI)) + return true; + + return false; +} + +/// fixupTaskFrameExternalUses - Fix any uses of variables defined in +/// taskframes, but outside of tasks themselves. For each such variable, insert +/// a memory allocation in the parent frame, add a store to that memory in the +/// taskframe, and modify external uses to use the value in that memory loaded +/// at the tasks continuation. +void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, + const DominatorTree &DT) { + Value *TaskFrame = TF->getTaskFrameCreate(); + if (!TaskFrame) + // Nothing to do for taskframe spindles that are actually task entries. + return; + Task *T = TF->getTaskFrameUser(); + + LLVM_DEBUG(dbgs() << "fixupTaskFrameExternalUses: spindle@" + << TF->getEntry()->getName() << "\n"); + LLVM_DEBUG({ + if (T) + dbgs() << " used by task@" << T->getEntry()->getName() << "\n"; + }); + + // Get the set of basic blocks in the taskframe spindles. At the same time, + // find the continuation of corresponding taskframe.resume intrinsics. + + SmallPtrSet BlocksToCheck; + BasicBlock *TFResumeContin = nullptr; + for (Spindle *S : TF->taskframe_spindles()) { + // Skip taskframe spindles within the task itself. + if (T && T->contains(S)) + continue; + for (BasicBlock *BB : S->blocks()) { + BlocksToCheck.insert(BB); + if (isTaskFrameResume(BB->getTerminator(), TaskFrame)) { + InvokeInst *TFResume = cast(BB->getTerminator()); + assert((nullptr == TFResumeContin) || + (TFResumeContin == TFResume->getUnwindDest()) && + "Multiple taskframe.resume destinations found"); + TFResumeContin = TFResume->getUnwindDest(); + } + } + } + + BasicBlock *Continuation = TF->getTaskFrameContinuation(); + + MapVector> ToRewrite; + MapVector> SyncRegionsToLocalize; + // Find instructions in the taskframe that are used outside of the taskframe. + for (BasicBlock *BB : BlocksToCheck) { + for (Instruction &I : *BB) { + // Ignore certain instructions from consideration: the taskframe.create + // intrinsic for this taskframe, the detach instruction that spawns T, and + // the landingpad value in T's EH continuation. + if (T && ((T->getTaskFrameUsed() == &I) || (T->getDetach() == &I) || + (T->getLPadValueInEHContinuationSpindle() == &I))) + continue; + + // Examine all users of this instruction. + for (Use &U : I.uses()) { + // If we find a live use outside of the task, it's an output. + if (Instruction *UI = dyn_cast(U.getUser())) { + if (!taskFrameEncloses(TF, UI->getParent(), TI)) { + LLVM_DEBUG(dbgs() << " ToRewrite: " << I << " (user " << *UI + << ")\n"); + ToRewrite[&I].push_back(&U); + } + } + } + } + // Collect any syncregions used in this taskframe that are defined outside. + if (!T) { + if (DetachInst *DI = dyn_cast(BB->getTerminator())) + if (!taskFrameContains( + TF, cast(DI->getSyncRegion())->getParent(), TI)) { + LLVM_DEBUG(dbgs() << " Sync region to localize: " + << *DI->getSyncRegion() << "(user " << *DI << ")\n"); + // Only record the detach. We can find associated reattaches and + // detached-rethrows later. + SyncRegionsToLocalize[DI->getSyncRegion()].push_back(DI); + } + + if (SyncInst *SI = dyn_cast(BB->getTerminator())) + if (!taskFrameContains( + TF, cast(SI->getSyncRegion())->getParent(), TI)) { + LLVM_DEBUG(dbgs() << " Sync region to localize: " + << *SI->getSyncRegion() << "(user " << *SI << ")\n"); + SyncRegionsToLocalize[SI->getSyncRegion()].push_back(SI); + } + } + } + + Module *M = TF->getEntry()->getModule(); + + // Localize any syncregions used in this taskframe. + for (auto &SRUsed : SyncRegionsToLocalize) { + Value *ReplSR = CallInst::Create( + Intrinsic::getDeclaration(M, Intrinsic::syncregion_start), + SRUsed.first->getName(), cast(TaskFrame)->getNextNode()); + for (Instruction *UseToRewrite : SRUsed.second) { + // Replace the syncregion of each sync. + if (SyncInst *SI = dyn_cast(UseToRewrite)) { + SI->setSyncRegion(ReplSR); + // Replace the syncregion of each sync.unwind. + if (CallBase *CB = dyn_cast( + SI->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime())) + if (isSyncUnwind(CB, SRUsed.first)) + CB->setArgOperand(0, ReplSR); + } else if (DetachInst *DI = dyn_cast(UseToRewrite)) { + // Replace the syncregion of each detach. + DI->setSyncRegion(ReplSR); + Task *SubT = TI.getTaskFor(DI->getDetached()); + // Replace the syncregion of corresponding reattach instructions. + for (BasicBlock *Pred : predecessors(DI->getContinue())) + if (ReattachInst *RI = dyn_cast(Pred->getTerminator())) + if (SubT->encloses(Pred)) + RI->setSyncRegion(ReplSR); + + // Replace the syncregion of corresponding detached.rethrows. + for (User *U : SRUsed.first->users()) + if (InvokeInst *II = dyn_cast(U)) + if (isDetachedRethrow(II) && SubT->encloses(II->getParent())) + II->setArgOperand(0, ReplSR); + } + } + } + + // Rewrite any uses of values defined in the taskframe that are used outside. + for (auto &TFInstr : ToRewrite) { + LLVM_DEBUG(dbgs() << "Fixing taskframe output " << *TFInstr.first << "\n"); + // Create an allocation to store the result of the instruction. + BasicBlock *ParentEntry; + if (Spindle *ParentTF = TF->getTaskFrameParent()) + ParentEntry = ParentTF->getEntry(); + else + ParentEntry = TF->getParentTask()->getEntry(); + IRBuilder<> Builder(&*ParentEntry->getFirstInsertionPt()); + Type *TFInstrTy = TFInstr.first->getType(); + AllocaInst *AI = Builder.CreateAlloca(TFInstrTy); + AI->setName(TFInstr.first->getName()); + + // Store the result of the instruction into that alloca. + if (isa(TFInstr.first)) + Builder.SetInsertPoint( + &*TFInstr.first->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(&*(++TFInstr.first->getIterator())); + Builder.CreateStore(TFInstr.first, AI); + + // Load the result of the instruction at the continuation. + Builder.SetInsertPoint(&*Continuation->getFirstInsertionPt()); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, + { AI->getType() }), { AI }); + LoadInst *ContinVal = Builder.CreateLoad(TFInstrTy, AI); + LoadInst *EHContinVal = nullptr; + + // For each external use, replace the use with a load from the alloca. + for (Use *UseToRewrite : TFInstr.second) { + Instruction *User = cast(UseToRewrite->getUser()); + BasicBlock *UserBB = User->getParent(); + if (auto *PN = dyn_cast(User)) + UserBB = PN->getIncomingBlock(*UseToRewrite); + + if (!DT.dominates(Continuation, UserBB)) { + assert(DT.dominates(TFResumeContin, UserBB) && + "Use not dominated by continuation or taskframe.resume"); + // If necessary, load the value at the taskframe.resume continuation. + if (!EHContinVal) { + Builder.SetInsertPoint(&*(TFResumeContin->getFirstInsertionPt())); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::taskframe_load_guard, + { AI->getType() }), { AI }); + EHContinVal = Builder.CreateLoad(TFInstrTy, AI); + } + + // Rewrite to use the value loaded at the taskframe.resume continuation. + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, EHContinVal); + UseToRewrite->set(EHContinVal); + continue; + } + + // Rewrite to use the value loaded at the continuation. + if (UseToRewrite->get()->hasValueHandle()) + ValueHandleBase::ValueIsRAUWd(*UseToRewrite, ContinVal); + UseToRewrite->set(ContinVal); + } + } +} + +// Helper method to find a taskframe.create intrinsic in the given basic block. +Instruction *llvm::FindTaskFrameCreateInBlock(BasicBlock *BB, + const Value *TFToIgnore) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + + // Ignore TFToIgnore + if (TFToIgnore == I) + continue; + + // Check if this instruction is a call to taskframe_create. + if (CallInst *CI = dyn_cast(I)) + if (isTapirIntrinsic(Intrinsic::taskframe_create, I)) + return CI; + } + return nullptr; +} + +// Helper method to create an unwind edge for a nested taskframe or spawned +// task. This unwind edge is a new basic block terminated by an appropriate +// terminator, i.e., a taskframe.resume or detached.rethrow intrinsic. +BasicBlock *llvm::CreateSubTaskUnwindEdge(Intrinsic::ID TermFunc, Value *Token, + BasicBlock *UnwindEdge, + BasicBlock *Unreachable, + Instruction *ParentI) { + Function *Caller = UnwindEdge->getParent(); + Module *M = Caller->getParent(); + LandingPadInst *OldLPad = UnwindEdge->getLandingPadInst(); + + // Create a new unwind edge for the detached rethrow. + BasicBlock *NewUnwindEdge = BasicBlock::Create( + Caller->getContext(), UnwindEdge->getName(), Caller); + IRBuilder<> Builder(NewUnwindEdge); + // Get a debug location from ParentI. + if (const DebugLoc &Loc = ParentI->getDebugLoc()) + Builder.SetCurrentDebugLocation(Loc); + + // Add a landingpad to the new unwind edge. + LandingPadInst *LPad = Builder.CreateLandingPad(OldLPad->getType(), 0, + OldLPad->getName()); + LPad->setCleanup(true); + + // Add the terminator-function invocation. + Builder.CreateInvoke(Intrinsic::getDeclaration(M, TermFunc, + { LPad->getType() }), + Unreachable, UnwindEdge, { Token, LPad }); + + return NewUnwindEdge; +} + +static BasicBlock *MaybePromoteCallInBlock(BasicBlock *BB, + BasicBlock *UnwindEdge, + const Value *TaskFrame) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast(I); + + if (!CI || CI->isInlineAsm()) + continue; + + // Stop the search early if we encounter a taskframe.create or a + // taskframe.end. + if (isTapirIntrinsic(Intrinsic::taskframe_create, CI) || + (TaskFrame && + isTapirIntrinsic(Intrinsic::taskframe_end, CI, TaskFrame))) + return nullptr; + + // No need to transform calls that do not throw. + if (CI->doesNotThrow()) + continue; + // We cannot transform calls with musttail tag. + if (CI->isMustTailCall()) + continue; + + // We do not need to (and in fact, cannot) convert possibly throwing calls + // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into + // invokes. The caller's "segment" of the deoptimization continuation + // attached to the newly inlined @llvm.experimental_deoptimize + // (resp. @llvm.experimental.guard) call should contain the exception + // handling logic, if any. + if (auto *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize || + F->getIntrinsicID() == Intrinsic::experimental_guard) + continue; + + changeToInvokeAndSplitBasicBlock(CI, UnwindEdge); + return BB; + } + return nullptr; +} + +static Instruction *GetTaskFrameInstructionInBlock(BasicBlock *BB, + const Value *TaskFrame) { + for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { + Instruction *I = &*BBI++; + + // We only need to check for function calls: inlined invoke + // instructions require no special handling. + CallInst *CI = dyn_cast(I); + + if (!CI || CI->isInlineAsm()) + continue; + + // Stop the search early if we encounter a taskframe.create or a + // taskframe.end. + if (isTapirIntrinsic(Intrinsic::taskframe_create, CI) && CI != TaskFrame) + return I; + if (TaskFrame && isTapirIntrinsic(Intrinsic::taskframe_end, CI, TaskFrame)) + return I; + } + return nullptr; +} + +// Recursively handle inlined tasks. +static void PromoteCallsInTasksHelper( + BasicBlock *EntryBlock, BasicBlock *UnwindEdge, + BasicBlock *Unreachable, Value *CurrentTaskFrame, + SmallVectorImpl *ParentWorklist, + SmallPtrSetImpl &Processed) { + SmallVector DetachesToReplace; + SmallVector Worklist; + // TODO: See if we need a global Visited set over all recursive calls, i.e., + // to handle shared exception-handling blocks. + SmallPtrSet Visited; + Worklist.push_back(EntryBlock); + do { + BasicBlock *BB = Worklist.pop_back_val(); + // Skip blocks we've seen before + if (!Visited.insert(BB).second) + continue; + + // Promote any calls in the block to invokes. + while (BasicBlock *NewBB = + MaybePromoteCallInBlock(BB, UnwindEdge, CurrentTaskFrame)) + BB = cast(NewBB->getTerminator())->getNormalDest(); + + Instruction *TFI = GetTaskFrameInstructionInBlock(BB, CurrentTaskFrame); + if (TFI && isTapirIntrinsic(Intrinsic::taskframe_create, TFI)) { + Processed.insert(BB); + Instruction *TFCreate = TFI; + if (TFCreate != CurrentTaskFrame) { + // Split the block at the taskframe.create, if necessary. + BasicBlock *NewBB; + if (TFCreate != &BB->front()) + NewBB = SplitBlock(BB, TFCreate); + else + NewBB = BB; + + // Create an unwind edge for the taskframe. + BasicBlock *TaskFrameUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::taskframe_resume, TFCreate, UnwindEdge, + Unreachable, TFCreate); + + // Recursively check all blocks + PromoteCallsInTasksHelper(NewBB, TaskFrameUnwindEdge, Unreachable, + TFCreate, &Worklist, Processed); + + // Remove the unwind edge for the taskframe if it is not needed. + if (pred_empty(TaskFrameUnwindEdge)) + TaskFrameUnwindEdge->eraseFromParent(); + continue; + } + } else if (TFI && isTapirIntrinsic(Intrinsic::taskframe_end, TFI, + CurrentTaskFrame)) { + // If we find a taskframe.end in this block that ends the current + // taskframe, add this block to the parent search. + assert(ParentWorklist && + "Unexpected taskframe.end: no parent worklist"); + if (BB->getTerminator()->getPrevNode() != TFI || + !isa(BB->getTerminator())) { + // This taskframe.end does not terminate the basic block. To make sure + // the rest of the block is processed properly, split the block. + BasicBlock *NewBB = SplitBlock(BB, TFI->getNextNode()); + ParentWorklist->push_back(NewBB); + } else { + // Add all successors of BB to the worklist. + for (BasicBlock *Successor : successors(BB)) + ParentWorklist->push_back(Successor); + } + continue; + } + + // Ignore reattach terminators. + if (isa(BB->getTerminator()) || + isDetachedRethrow(BB->getTerminator())) + continue; + + // If we find a taskframe.resume terminator, add its successor to the + // parent search. + if (isTaskFrameResume(BB->getTerminator())) { + assert(isTaskFrameResume(UnwindEdge->getTerminator()) && + "Unexpected taskframe.resume, doesn't correspond to unwind edge"); + InvokeInst *II = cast(BB->getTerminator()); + assert(ParentWorklist && + "Unexpected taskframe.resume: no parent worklist"); + ParentWorklist->push_back(II->getUnwindDest()); + continue; + } + + // Process a detach instruction specially. In particular, process th + // spawned task recursively. + if (DetachInst *DI = dyn_cast(BB->getTerminator())) { + Processed.insert(BB); + if (!DI->hasUnwindDest()) { + // Create an unwind edge for the subtask, which is terminated with a + // detached-rethrow. + BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge, + Unreachable, DI); + // Recursively check all blocks in the detached task. + PromoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, + Unreachable, CurrentTaskFrame, &Worklist, + Processed); + // If the new unwind edge is not used, remove it. + if (pred_empty(SubTaskUnwindEdge)) + SubTaskUnwindEdge->eraseFromParent(); + else + DetachesToReplace.push_back(DI); + + } else { + // Because this detach has an unwind destination, Any calls in the + // spawned task that may throw should already be invokes. Hence there + // is no need to promote calls in this task. + if (Visited.insert(DI->getUnwindDest()).second) + // If the detach-unwind isn't dead, add it to the worklist. + Worklist.push_back(DI->getUnwindDest()); + } + // Add the continuation to the worklist. + if (isTaskFrameResume(UnwindEdge->getTerminator()) && + (CurrentTaskFrame == getTaskFrameUsed(DI->getDetached()))) { + // This detach-continuation terminates the current taskframe, so push it + // onto the parent worklist. + assert(ParentWorklist && "Unexpected taskframe unwind edge"); + ParentWorklist->push_back(DI->getContinue()); + } else { + // We can process this detach-continuation directly, because it does not + // terminate the current taskframe. + Worklist.push_back(DI->getContinue()); + } + continue; + } + + // In the normal case, add all successors of BB to the worklist. + for (BasicBlock *Successor : successors(BB)) + Worklist.push_back(Successor); + + } while (!Worklist.empty()); + + // Replace detaches that now require unwind destinations. + while (!DetachesToReplace.empty()) { + DetachInst *DI = DetachesToReplace.pop_back_val(); + ReplaceInstWithInst(DI, DetachInst::Create( + DI->getDetached(), DI->getContinue(), UnwindEdge, + DI->getSyncRegion())); + } +} + +static FunctionCallee getDefaultPersonalityFn(Module *M) { + LLVMContext &C = M->getContext(); + Triple T(M->getTargetTriple()); + EHPersonality Pers = getDefaultEHPersonality(T); + return M->getOrInsertFunction(getEHPersonalityName(Pers), + FunctionType::get(Type::getInt32Ty(C), true)); +} + +void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { + // Collect blocks to process, in order to handle unreachable blocks. + SmallVector ToProcess; + ToProcess.push_back(&F.getEntryBlock()); + for (BasicBlock &BB : F) { + Instruction *TFI = GetTaskFrameInstructionInBlock(&BB, nullptr); + if (TFI && isTapirIntrinsic(Intrinsic::taskframe_create, TFI)) + ToProcess.push_back(&BB); + + if (isa(BB.getTerminator())) + ToProcess.push_back(&BB); + } + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, Name, &F); + Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, 1, Name+".lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Create the normal return for the task resumes. + BasicBlock *UnreachableBlk = BasicBlock::Create(C, Name+".unreachable", &F); + + // Recursively handle inlined tasks. + SmallPtrSet Processed; + for (BasicBlock *BB : ToProcess) { + if (!Processed.contains(BB)) + PromoteCallsInTasksHelper(BB, CleanupBB, UnreachableBlk, nullptr, nullptr, + Processed); + } + + // Either finish inserting the cleanup block (and associated data) or remove + // it, depending on whether it is used. + if (!pred_empty(CleanupBB)) { + if (!F.hasPersonalityFn()) { + FunctionCallee PersFn = getDefaultPersonalityFn(F.getParent()); + F.setPersonalityFn(cast(PersFn.getCallee())); + } + // Inherit debug info for the landingpad and resume in CleanupBB, if + // possible. + for (const BasicBlock *Pred : predecessors(CleanupBB)) + if (const DebugLoc &Loc = Pred->getTerminator()->getDebugLoc()) { + LPad->setDebugLoc(Loc); + RI->setDebugLoc(Loc); + break; + } + } else { + CleanupBB->eraseFromParent(); + } + + // Either finish the unreachable block or remove it, depending on whether it + // is used. + if (!pred_empty(UnreachableBlk)) { + IRBuilder<> Builder(UnreachableBlk); + Builder.CreateUnreachable(); + } else { + UnreachableBlk->eraseFromParent(); + } +} + +void llvm::eraseTaskFrame(Value *TaskFrame, DominatorTree *DT) { + InlineTaskFrameResumes(TaskFrame, DT); + SmallVector ToErase; + for (User *U : TaskFrame->users()) { + if (Instruction *UI = dyn_cast(U)) + if (isTapirIntrinsic(Intrinsic::taskframe_use, UI) || + isTapirIntrinsic(Intrinsic::taskframe_end, UI)) + ToErase.push_back(UI); + } + + for (Instruction *I : ToErase) + I->eraseFromParent(); + + cast(TaskFrame)->eraseFromParent(); +} + +/// Find hints specified in the loop metadata and update local values. +void llvm::TapirLoopHints::getHintsFromMetadata() { + MDNode *LoopID = TheLoop->getLoopID(); + if (!LoopID) + return; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + const MDString *S = nullptr; + SmallVector Args; + + // The expected hint is either a MDString or a MDNode with the first + // operand a MDString. + if (const MDNode *MD = dyn_cast(LoopID->getOperand(i))) { + if (!MD || MD->getNumOperands() == 0) + continue; + S = dyn_cast(MD->getOperand(0)); + for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) + Args.push_back(MD->getOperand(i)); + } else { + S = dyn_cast(LoopID->getOperand(i)); + assert(Args.size() == 0 && "too many arguments for MDString"); + } + + if (!S) + continue; + + // Check if the hint starts with the loop metadata prefix. + StringRef Name = S->getString(); + if (Args.size() == 1) + setHint(Name, Args[0]); + } +} + +/// Checks string hint with one operand and set value if valid. +void llvm::TapirLoopHints::setHint(StringRef Name, Metadata *Arg) { + if (!Name.startswith(Prefix())) + return; + Name = Name.substr(Prefix().size(), StringRef::npos); + + const ConstantInt *C = mdconst::dyn_extract(Arg); + if (!C) + return; + unsigned Val = C->getZExtValue(); + + Hint *Hints[] = {&Strategy, &Grainsize}; + for (auto H : Hints) { + if (Name == H->Name) { + if (H->validate(Val)) + H->Value = Val; + else + LLVM_DEBUG(dbgs() << "Tapir: ignoring invalid hint '" << + Name << "'\n"); + break; + } + } +} + +/// Create a new hint from name / value pair. +MDNode *llvm::TapirLoopHints::createHintMetadata( + StringRef Name, unsigned V) const { + LLVMContext &Context = TheLoop->getHeader()->getContext(); + Metadata *MDs[] = {MDString::get(Context, Name), + ConstantAsMetadata::get( + ConstantInt::get(Type::getInt32Ty(Context), V))}; + return MDNode::get(Context, MDs); +} + +/// Matches metadata with hint name. +bool llvm::TapirLoopHints::matchesHintMetadataName( + MDNode *Node, ArrayRef HintTypes) const { + MDString *Name = dyn_cast(Node->getOperand(0)); + if (!Name) + return false; + + for (auto H : HintTypes) + if (Name->getString().endswith(H.Name)) + return true; + return false; +} + +/// Sets current hints into loop metadata, keeping other values intact. +void llvm::TapirLoopHints::writeHintsToMetadata(ArrayRef HintTypes) { + if (HintTypes.size() == 0) + return; + + LLVMContext &Context = TheLoop->getHeader()->getContext(); + SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(Context, std::nullopt); + MDs.push_back(TempNode.get()); + + // If the loop already has metadata, then ignore the existing operands. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If node in update list, ignore old value. + if (!matchesHintMetadataName(Node, HintTypes)) + MDs.push_back(Node); + } + } + + // Now, add the missing hints. + for (auto H : HintTypes) + MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); + + // Replace current metadata node with new one. + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + + TheLoop->setLoopID(NewLoopID); +} + +/// Sets current hints into loop metadata, keeping other values intact. +void llvm::TapirLoopHints::writeHintsToClonedMetadata(ArrayRef HintTypes, + ValueToValueMapTy &VMap) { + if (HintTypes.size() == 0) + return; + + LLVMContext &Context = + cast(VMap[TheLoop->getHeader()])->getContext(); + SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(Context, std::nullopt); + MDs.push_back(TempNode.get()); + + // If the loop already has metadata, then ignore the existing operands. + MDNode *OrigLoopID = TheLoop->getLoopID(); + if (!OrigLoopID) + return; + + if (MDNode *LoopID = dyn_cast_or_null(VMap.MD()[OrigLoopID])) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If node in update list, ignore old value. + if (!matchesHintMetadataName(Node, HintTypes)) + MDs.push_back(Node); + } + } + + // Now, add the missing hints. + for (auto H : HintTypes) + MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value)); + + // Replace current metadata node with new one. + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + + // Set the metadata on the terminator of the cloned loop's latch. + BasicBlock *ClonedLatch = cast(VMap[TheLoop->getLoopLatch()]); + assert(ClonedLatch && "Cloned Tapir loop does not have a single latch."); + ClonedLatch->getTerminator()->setMetadata(LLVMContext::MD_loop, NewLoopID); +} + +/// Sets current hints into loop metadata, keeping other values intact. +void llvm::TapirLoopHints::clearHintsMetadata() { + Hint Hints[] = {Hint("spawn.strategy", ST_SEQ, HK_STRATEGY), + Hint("grainsize", 0, HK_GRAINSIZE)}; + LLVMContext &Context = TheLoop->getHeader()->getContext(); + SmallVector MDs; + + // Reserve first location for self reference to the LoopID metadata node. + TempMDTuple TempNode = MDNode::getTemporary(Context, std::nullopt); + MDs.push_back(TempNode.get()); + + // If the loop already has metadata, then ignore the existing operands. + MDNode *LoopID = TheLoop->getLoopID(); + if (LoopID) { + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *Node = cast(LoopID->getOperand(i)); + // If node in update list, ignore old value. + if (!matchesHintMetadataName(Node, Hints)) + MDs.push_back(Node); + } + } + + // Replace current metadata node with new one. + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + + TheLoop->setLoopID(NewLoopID); +} + +/// Returns true if Tapir-loop hints require loop outlining during lowering. +bool llvm::hintsDemandOutlining(const TapirLoopHints &Hints) { + switch (Hints.getStrategy()) { + case TapirLoopHints::ST_DAC: return true; + default: return false; + } +} + +MDNode *llvm::CopyNonTapirLoopMetadata(MDNode *LoopID, MDNode *OrigLoopID) { + SmallVector MDs; + MDs.push_back(nullptr); + + // Gather all existing loop metadata. + if (LoopID) + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) + MDs.push_back(LoopID->getOperand(i)); + + // Inherit metadata from original loop. + for (const MDOperand &Existing : drop_begin(OrigLoopID->operands(), 1)) { + MDNode *Op = cast(Existing.get()); + + // Skip malformatted attribute metadata nodes. + if (Op->getNumOperands() == 0) + return nullptr; + Metadata *NameMD = Op->getOperand(0).get(); + if (!isa(NameMD)) + return nullptr; + StringRef AttrName = cast(NameMD)->getString(); + // Skip tapir.loop metadata + if (!AttrName.startswith("tapir.loop")) + MDs.push_back(Op); + } + + // Build the new loop ID. + MDTuple *NewLoopID = MDNode::get(OrigLoopID->getContext(), MDs); + NewLoopID->replaceOperandWith(0, NewLoopID); + return NewLoopID; +} + +/// Examine a given loop to determine if it is a Tapir loop. Returns the Task +/// that encodes the loop body if so, or nullptr if not. +Task *llvm::getTaskIfTapirLoop(const Loop *L, TaskInfo *TI) { + if (!L || !TI) + return nullptr; + + TapirLoopHints Hints(L); + + LLVM_DEBUG(dbgs() << "Loop hints:" + << " strategy = " << Hints.printStrategy(Hints.getStrategy()) + << " grainsize = " << Hints.getGrainsize() + << "\n"); + + // Check that this loop has the structure of a Tapir loop. + Task *T = getTaskIfTapirLoopStructure(L, TI); + if (!T) + return nullptr; + + // Check that the loop hints require this loop to be outlined. + if (!hintsDemandOutlining(Hints)) + return nullptr; + + return T; +} diff --git a/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp b/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp new file mode 100644 index 00000000000000..c73344d52a905e --- /dev/null +++ b/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp @@ -0,0 +1,71 @@ +//===- TaskCanonicalize.cpp - Tapir task simplification pass ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass canonicalizes Tapir tasks, in particular, to split blocks at +// taskframe.create intrinsics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/TaskCanonicalize.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/InitializePasses.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "task-canonicalize" + +namespace { +struct TaskCanonicalize : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TaskCanonicalize() : FunctionPass(ID) { + initializeTaskCanonicalizePass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved(); + } +}; +} + +char TaskCanonicalize::ID = 0; +INITIALIZE_PASS_BEGIN(TaskCanonicalize, "task-canonicalize", + "Canonicalize Tapir tasks", false, false) +INITIALIZE_PASS_END(TaskCanonicalize, "task-canonicalize", + "Canonicalize Tapir tasks", false, false) + +namespace llvm { +Pass *createTaskCanonicalizePass() { return new TaskCanonicalize(); } +} // end namespace llvm + +/// runOnFunction - Run through all tasks in the function and canonicalize. +bool TaskCanonicalize::runOnFunction(Function &F) { + if (F.empty()) + return false; + + LLVM_DEBUG(dbgs() << "TaskCanonicalize running on function " << F.getName() + << "\n"); + + return splitTaskFrameCreateBlocks(F); +} + +PreservedAnalyses TaskCanonicalizePass::run(Function &F, + FunctionAnalysisManager &AM) { + if (F.empty()) + return PreservedAnalyses::all(); + + LLVM_DEBUG(dbgs() << "TaskCanonicalize running on function " << F.getName() + << "\n"); + + bool Changed = splitTaskFrameCreateBlocks(F); + if (!Changed) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} diff --git a/llvm/lib/Transforms/Utils/TaskSimplify.cpp b/llvm/lib/Transforms/Utils/TaskSimplify.cpp new file mode 100644 index 00000000000000..795780acad63a9 --- /dev/null +++ b/llvm/lib/Transforms/Utils/TaskSimplify.cpp @@ -0,0 +1,702 @@ +//===- TaskSimplify.cpp - Tapir task simplification pass ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to simplify Tapir tasks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/TaskSimplify.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TapirTaskInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/TapirUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "task-simplify" + +// Statistics +STATISTIC(NumUniqueSyncRegs, "Number of unique sync regions found."); +STATISTIC(NumDiscriminatingSyncs, "Number of discriminating syncs found."); +STATISTIC(NumTaskFramesErased, "Number of taskframes erased"); +STATISTIC( + NumTaskFramesConverted, + "Number of taskframes converted to stacksave and stackrestore intrinsics"); +STATISTIC(NumSimpl, "Number of blocks simplified"); + +static cl::opt SimplifyTaskFrames( + "simplify-taskframes", cl::init(true), cl::Hidden, + cl::desc("Enable simplification of taskframes.")); + +static cl::opt PostCleanupCFG( + "post-cleanup-cfg", cl::init(true), cl::Hidden, + cl::desc("Cleanup the CFG after task simplification.")); + +static cl::opt PreserveAllSpawns( + "tasksimplify-preserve-all-spawns", cl::init(false), cl::Hidden, + cl::desc("Temporary development switch to ensure TaskSimplify does not " + "eliminate spawns that immediately sync.")); + +static bool syncMatchesReachingTask(const Value *SyncSR, + SmallPtrSetImpl &MPTasks) { + if (MPTasks.empty()) + return false; + for (const Task *MPTask : MPTasks) + if (SyncSR == MPTask->getDetach()->getSyncRegion()) + return true; + return false; +} + +static bool removeRedundantSyncs(MaybeParallelTasks &MPTasks, Task *T) { + // Skip tasks with no subtasks. + if (T->isSerial()) + return false; + + bool Changed = false; + SmallPtrSet RedundantSyncs; + for (Spindle *S : T->spindles()) + // Iterate over outgoing edges of S to find redundant syncs. + for (Spindle::SpindleEdge &Edge : S->out_edges()) + if (SyncInst *Y = dyn_cast(Edge.second->getTerminator())) + if (!syncMatchesReachingTask(Y->getSyncRegion(), MPTasks.TaskList[S])) { + LLVM_DEBUG(dbgs() << "Found redundant sync in spindle " << *S << + "\n"); + RedundantSyncs.insert(Y); + } + + // Replace all unnecesary syncs with unconditional branches. + SmallPtrSet MaybeDeadSyncUnwinds; + for (SyncInst *Y : RedundantSyncs) { + // Check for any sync.unwinds that might now be dead. + Instruction *MaybeSyncUnwind = + Y->getSuccessor(0)->getFirstNonPHIOrDbgOrLifetime(); + if (isSyncUnwind(MaybeSyncUnwind, Y->getSyncRegion())) + MaybeDeadSyncUnwinds.insert(cast(MaybeSyncUnwind)); + + LLVM_DEBUG(dbgs() << "Removing redundant sync " << *Y << "\n"); + ReplaceInstWithInst(Y, BranchInst::Create(Y->getSuccessor(0))); + } + // Remove any dead sync.unwinds. + for (CallBase *CB : MaybeDeadSyncUnwinds) { + LLVM_DEBUG(dbgs() << "Remove dead sync unwind " << *CB << "? "); + if (removeDeadSyncUnwind(CB)) + LLVM_DEBUG(dbgs() << "Yes.\n"); + else + LLVM_DEBUG(dbgs() << "No.\n"); + } + + Changed |= !RedundantSyncs.empty(); + + return Changed; +} + +static bool syncIsDiscriminating(const Value *SyncSR, + SmallPtrSetImpl &MPTasks) { + for (const Task *MPTask : MPTasks) + if (SyncSR != MPTask->getDetach()->getSyncRegion()) + return true; + return false; +} + +static bool removeRedundantSyncRegions(MaybeParallelTasks &MPTasks, Task *T) { + if (T->isSerial()) + return false; + + // Create filter for MPTasks of tasks from parent of T. + SmallPtrSet EntryTaskList; + for (const Task *MPTask : MPTasks.TaskList[T->getEntrySpindle()]) + EntryTaskList.insert(MPTask); + + // Find the unique sync regions in this task. + SmallPtrSet UniqueSyncRegs; + Instruction *FirstSyncRegion = nullptr; + for (Task *SubT : T->subtasks()) { + UniqueSyncRegs.insert(SubT->getDetach()->getSyncRegion()); + if (!FirstSyncRegion) + FirstSyncRegion = cast( + SubT->getDetach()->getSyncRegion()); + } + NumUniqueSyncRegs += UniqueSyncRegs.size(); + // Skip this task if there's only one unique sync region. + if (UniqueSyncRegs.size() < 2) + return false; + + bool Changed = false; + SmallPtrSet NonRedundantSyncRegs; + for (Spindle *S : T->spindles()) { + // Only consider spindles that might have tasks in parallel. + if (MPTasks.TaskList[S].empty()) continue; + + // Filter the task list of S to exclude tasks in parallel with the entry. + SmallPtrSet LocalTaskList; + for (const Task *MPTask : MPTasks.TaskList[S]) + if (!EntryTaskList.count(MPTask)) + LocalTaskList.insert(MPTask); + if (LocalTaskList.empty()) continue; + + // Iterate over outgoing edges of S to find discriminating syncs. + for (Spindle::SpindleEdge &Edge : S->out_edges()) + if (const SyncInst *Y = dyn_cast(Edge.second->getTerminator())) + if (syncIsDiscriminating(Y->getSyncRegion(), LocalTaskList)) { + ++NumDiscriminatingSyncs; + LLVM_DEBUG(dbgs() << "Found discriminating sync " << *Y << "\n"); + NonRedundantSyncRegs.insert(Y->getSyncRegion()); + for (const Task *MPTask : LocalTaskList) + NonRedundantSyncRegs.insert(MPTask->getDetach()->getSyncRegion()); + } + } + + // Replace all redundant sync regions with the first sync region. + for (Value *SR : UniqueSyncRegs) { + if (!NonRedundantSyncRegs.count(SR) && SR != FirstSyncRegion) { + LLVM_DEBUG(dbgs() << "Replacing " << *SR << " with " << *FirstSyncRegion + << "\n"); + Changed = true; + SR->replaceAllUsesWith(FirstSyncRegion); + // Ensure that the first sync region is in the entry block of T. + if (FirstSyncRegion->getParent() != T->getEntry()) + FirstSyncRegion->moveAfter(&*T->getEntry()->getFirstInsertionPt()); + } + } + + return Changed; +} + +bool llvm::simplifySyncs(Task *T, MaybeParallelTasks &MPTasks) { + bool Changed = false; + + LLVM_DEBUG(dbgs() << "Simplifying syncs in task @ " + << T->getEntry()->getName() << "\n"); + + // Remove redundant syncs. This optimization might not be necessary here, + // because SimplifyCFG seems to do a good job removing syncs that cannot sync + // anything. + Changed |= removeRedundantSyncs(MPTasks, T); + + // Remove redundant sync regions. + Changed |= removeRedundantSyncRegions(MPTasks, T); + + return Changed; +} + +static bool taskCanThrow(const Task *T) { + for (const Spindle *S : T->spindles()) + for (const BasicBlock *BB : S->blocks()) + if (isa(BB->getTerminator())) + return true; + return false; +} + +static bool taskCanReachContinuation(Task *T) { + if (T->isRootTask()) + return true; + + DetachInst *DI = T->getDetach(); + BasicBlock *Continue = DI->getContinue(); + for (BasicBlock *Pred : predecessors(Continue)) { + if (ReattachInst *RI = dyn_cast(Pred->getTerminator())) + if (T->encloses(RI->getParent())) + return true; + } + + return false; +} + +static bool detachImmediatelySyncs(DetachInst *DI) { + Instruction *I = DI->getContinue()->getFirstNonPHIOrDbgOrLifetime(); + return isa(I); +} + +bool llvm::simplifyTask(Task *T) { + if (T->isRootTask()) + return false; + + LLVM_DEBUG(dbgs() << "Simplifying task @ " << T->getEntry()->getName() + << "\n"); + + bool Changed = false; + DetachInst *DI = T->getDetach(); + + bool NestedSync = taskContainsSync(T); + + // If T's detach has an unwind dest and T cannot throw, remove the unwind + // destination from T's detach. + if (DI->hasUnwindDest()) { + if (!taskCanThrow(T)) { + removeUnwindEdge(DI->getParent()); + // removeUnwindEdge will invalidate the DI pointer. Get the new DI + // pointer. + DI = T->getDetach(); + Changed = true; + } + } + + if (!taskCanReachContinuation(T)) { + // This optimization assumes that if a task cannot reach its continuation + // then we shouldn't bother spawning it. The task might perform code that + // can reach the unwind destination, however. + SerializeDetach(DI, T, NestedSync); + Changed = true; + } else if (!PreserveAllSpawns && detachImmediatelySyncs(DI)) { + SerializeDetach(DI, T, NestedSync); + Changed = true; + } + + return Changed; +} + +static bool canRemoveTaskFrame(const Spindle *TF, MaybeParallelTasks &MPTasks, + bool &TaskFrameContainsAlloca) { + Value *TFCreate = TF->getTaskFrameCreate(); + if (!TFCreate) + // Ignore implicit taskframes created from the start of a task that does not + // explicitly use another taskframe. + return false; + + // We can remove a taskframe if it does not allocate any stack storage of its + // own and it does not contain any distinguishing syncs. + + // We only need to check the spindles in the taskframe itself for these + // properties. We do not need to check the task that uses this taskframe. + const Task *UserT = TF->getTaskFromTaskFrame(); + + if (!UserT && !MPTasks.TaskList[TF].empty() && getTaskFrameResume(TFCreate)) + // Landingpads perform an implicit sync, so if there are logically parallel + // tasks with this unassociated taskframe and it has a resume destination, + // then it has a distinguishing sync. + return false; + + // Create filter for MPTasks of tasks from parent of task UserT, if UserT + // exists. + SmallPtrSet EntryTaskList; + if (UserT) + for (const Task *MPTask : MPTasks.TaskList[UserT->getEntrySpindle()]) + EntryTaskList.insert(MPTask); + + for (const Spindle *S : TF->taskframe_spindles()) { + // Skip spindles in the user task. + if (UserT && UserT->contains(S)) + continue; + + // Skip spindles that are placeholders. + if (isPlaceholderSuccessor(S->getEntry())) + continue; + + // Skip spindles in nested taskframes. + if (S != TF && S->getTaskFrameParent() != TF) + continue; + + // Filter the task list of S to exclude tasks in parallel with the entry. + SmallPtrSet LocalTaskList; + for (const Task *MPTask : MPTasks.TaskList[S]) + if (!EntryTaskList.count(MPTask)) + LocalTaskList.insert(MPTask); + + for (const BasicBlock *BB : S->blocks()) { + // If the taskframe contains an alloca, then we can replace it with + // stacksave and stackrestore intrinsics if there is no associated task. + // Otherwise, we cannot remove the taskframe. + for (const Instruction &I : *BB) { + if (isa(I)) { + TaskFrameContainsAlloca = true; + if (UserT) + return false; + } + } + + // We cannot remove taskframes that contain discriminating syncs. Doing + // so would cause these syncs to sync tasks spawned in the parent + // taskframe. + if (const SyncInst *SI = dyn_cast(BB->getTerminator())) + if (syncIsDiscriminating(SI->getSyncRegion(), LocalTaskList)) + return false; + } + } + + return true; +} + +static bool skipForHoisting(const Instruction *I, + SmallPtrSetImpl &NotHoisted) { + if (I->isTerminator() || isTapirIntrinsic(Intrinsic::taskframe_create, I) || + isTapirIntrinsic(Intrinsic::syncregion_start, I) || + isa(I)) + return true; + + if (const CallInst *CI = dyn_cast(I)) + if (!(CI->doesNotAccessMemory() || CI->onlyAccessesArgMemory())) + return true; + + for (const Value *V : I->operand_values()) + if (const Instruction *I = dyn_cast(V)) + if (NotHoisted.count(I)) + return true; + + return false; +} + +static bool hoistOutOfTaskFrame(Instruction *TFCreate) { + bool Changed = false; + + BasicBlock *Entry = TFCreate->getParent(); + // We'll move instructions immediately before the taskframe.create + // instruction. + BasicBlock::iterator InsertPoint = Entry->begin(); + + // Scan the instructions in the entry block and find instructions to hoist + // before the taskframe.create. + SmallPtrSet NotHoisted; + for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ) { + Instruction *Start = &*I++; + if (skipForHoisting(Start, NotHoisted)) { + NotHoisted.insert(Start); + continue; + } + + while (!skipForHoisting(&*I, NotHoisted)) + ++I; + + // Move the instructions + Entry->splice(InsertPoint, &*Entry, Start->getIterator(), I); + + Changed = true; + } + + return Changed; +} + +bool llvm::simplifyTaskFrames(TaskInfo &TI, DominatorTree &DT) { + // We compute maybe-parallel tasks here, to ensure the analysis is properly + // discarded if the CFG changes. + MaybeParallelTasks MPTasks; + TI.evaluateParallelState(MPTasks); + + bool Changed = false; + + // Get the set of taskframes we can erase. + SmallVector TaskFramesToErase; + SmallVector TaskFramesToConvert; + SmallVector TaskFramesToOptimize; + for (Spindle *TFRoot : TI.getRootTask()->taskframe_roots()) { + for (Spindle *TF : post_order>(TFRoot)) { + bool TaskFrameContainsAlloca = false; + if (canRemoveTaskFrame(TF, MPTasks, TaskFrameContainsAlloca)) { + if (TaskFrameContainsAlloca) + TaskFramesToConvert.push_back( + cast(TF->getTaskFrameCreate())); + else + TaskFramesToErase.push_back( + cast(TF->getTaskFrameCreate())); + } else if (Value *TFCreate = TF->getTaskFrameCreate()) + TaskFramesToOptimize.push_back(cast(TFCreate)); + } + } + + // First handle hoisting instructions out of a taskframe entry block, since + // this transformation does not change the CFG. + for (Instruction *TFCreate : TaskFramesToOptimize) { + LLVM_DEBUG(dbgs() << "Hoisting instructions out of taskframe " << *TFCreate + << "\n"); + Changed |= hoistOutOfTaskFrame(TFCreate); + } + + // Now delete any taskframes we don't need. + for (Instruction *TFCreate : TaskFramesToConvert) { + LLVM_DEBUG(dbgs() << "Converting taskframe " << *TFCreate << "\n"); + Module *M = TFCreate->getModule(); + Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Function *StackRestore = + Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + + // Save the stack at the point of the taskframe.create. + CallInst *SavedPtr = + IRBuilder<>(TFCreate).CreateCall(StackSave, {}, "savedstack.ts"); + + for (User *U : TFCreate->users()) { + if (Instruction *UI = dyn_cast(U)) { + // Restore the stack at each end of the taskframe. + if (isTapirIntrinsic(Intrinsic::taskframe_end, UI) || + isTapirIntrinsic(Intrinsic::taskframe_resume, UI)) + IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr); + } + } + // Remove the taskframe. + eraseTaskFrame(TFCreate, &DT); + ++NumTaskFramesConverted; + Changed = true; + } + for (Instruction *TFCreate : TaskFramesToErase) { + LLVM_DEBUG(dbgs() << "Removing taskframe " << *TFCreate << "\n"); + eraseTaskFrame(TFCreate, &DT); + ++NumTaskFramesErased; + Changed = true; + } + + return Changed; +} + + +/// Call SimplifyCFG on all the blocks in the function, +/// iterating until no more changes are made. +static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, + DomTreeUpdater *DTU, + const SimplifyCFGOptions &Options) { + bool Changed = false; + bool LocalChange = true; + + SmallVector, 32> Edges; + FindFunctionBackedges(F, Edges); + SmallPtrSet UniqueLoopHeaders; + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + UniqueLoopHeaders.insert(const_cast(Edges[i].second)); + + SmallVector LoopHeaders(UniqueLoopHeaders.begin(), + UniqueLoopHeaders.end()); + + while (LocalChange) { + LocalChange = false; + + // Loop over all of the basic blocks and remove them if they are unneeded. + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { + BasicBlock &BB = *BBIt++; + if (DTU) { + assert( + !DTU->isBBPendingDeletion(&BB) && + "Should not end up trying to simplify blocks marked for removal."); + // Make sure that the advanced iterator does not point at the blocks + // that are marked for removal, skip over all such blocks. + while (BBIt != F.end() && DTU->isBBPendingDeletion(&*BBIt)) + ++BBIt; + } + if (simplifyCFG(&BB, TTI, DTU, Options, LoopHeaders)) { + LocalChange = true; + ++NumSimpl; + } + } + Changed |= LocalChange; + } + return Changed; +} + +static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, + DominatorTree *DT, + const SimplifyCFGOptions &Options) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + + bool EverChanged = removeUnreachableBlocks(F, DT ? &DTU : nullptr); + EverChanged |= iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); + + // If neither pass changed anything, we're done. + if (!EverChanged) return false; + + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocks is needed to nuke them, which means we should + // iterate between the two optimizations. We structure the code like this to + // avoid rerunning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocks doesn't do anything. + if (!removeUnreachableBlocks(F, DT ? &DTU : nullptr)) + return true; + + do { + EverChanged = iterativelySimplifyCFG(F, TTI, DT ? &DTU : nullptr, Options); + EverChanged |= removeUnreachableBlocks(F, DT ? &DTU : nullptr); + } while (EverChanged); + + return true; +} + +namespace { +struct TaskSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TaskSimplify() : FunctionPass(ID) { + initializeTaskSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } +}; +} + +char TaskSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(TaskSimplify, "task-simplify", + "Simplify Tapir tasks", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) +INITIALIZE_PASS_END(TaskSimplify, "task-simplify", + "Simplify Tapir tasks", false, false) + +namespace llvm { +Pass *createTaskSimplifyPass() { return new TaskSimplify(); } +} // end namespace llvm + +/// runOnFunction - Run through all tasks in the function and simplify them in +/// post order. +/// +bool TaskSimplify::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + DominatorTree &DT = getAnalysis().getDomTree(); + TaskInfo &TI = getAnalysis().getTaskInfo(); + bool SplitBlocks = splitTaskFrameCreateBlocks(F, &DT, &TI); + TI.findTaskFrameTree(); + if (TI.isSerial() && !TI.foundChildTaskFrames()) + return false; + + SimplifyCFGOptions Options; + auto &TTI = getAnalysis().getTTI(F); + Options.AC = &getAnalysis().getAssumptionCache(F); + + bool Changed = false; + LLVM_DEBUG(dbgs() << "TaskSimplify running on function " << F.getName() + << "\n"); + + if (SimplifyTaskFrames) { + // Simplify taskframes. If anything changed, update the analysis. + Changed |= simplifyTaskFrames(TI, DT); + if (Changed) { + TI.recalculate(F, DT); + if (TI.isSerial()) { + if (PostCleanupCFG && SplitBlocks) + simplifyFunctionCFG(F, TTI, &DT, Options); + return Changed; + } + } + } + + // Evaluate the tasks that might be in parallel with each spindle, and + // determine number of discriminating syncs: syncs that sync a subset of the + // detached tasks, based on sync regions. + MaybeParallelTasks MPTasks; + TI.evaluateParallelState(MPTasks); + + // Simplify syncs in each task in the function. + for (Task *T : post_order(TI.getRootTask())) + Changed |= simplifySyncs(T, MPTasks); + + // Simplify each task in the function. + for (Task *T : post_order(TI.getRootTask())) + Changed |= simplifyTask(T); + + if (PostCleanupCFG && (Changed | SplitBlocks)) + Changed |= simplifyFunctionCFG(F, TTI, nullptr, Options); + + return Changed; +} + +PreservedAnalyses TaskSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + if (F.empty()) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + DominatorTree &DT = AM.getResult(F); + TaskInfo &TI = AM.getResult(F); + LoopInfo *LI = AM.getCachedResult(F); + auto *MSSAAnalysis = AM.getCachedResult(F); + std::unique_ptr MSSAU; + if (MSSAAnalysis) { + auto *MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = std::make_unique(MSSA); + } + + bool SplitBlocks = splitTaskFrameCreateBlocks(F, &DT, &TI, LI, MSSAU.get()); + TI.findTaskFrameTree(); + // Return early if there are no Tapir tasks or taskframes to simplify. + if (TI.isSerial() && !TI.foundChildTaskFrames()) { + // If we didn't event split taskframe.create blocks, all analyses are + // preserved. + if (!SplitBlocks) + return PreservedAnalyses::all(); + + // Identify passes preserved by splitTaskFrameCreateBlocks. + PA.preserve(); + PA.preserve(); + PA.preserve(); + if (LI) + PA.preserve(); + if (MSSAAnalysis) + PA.preserve(); + return PA; + } + + SimplifyCFGOptions Options; + auto &TTI = AM.getResult(F); + Options.AC = &AM.getResult(F); + + bool Changed = false; + LLVM_DEBUG(dbgs() << "TaskSimplify running on function " << F.getName() + << "\n"); + + if (SimplifyTaskFrames) { + // Simplify taskframes. If anything changed, update the analysis. + Changed |= simplifyTaskFrames(TI, DT); + if (Changed) { + TI.recalculate(F, DT); + if (TI.isSerial()) { + if (PostCleanupCFG && SplitBlocks) + simplifyFunctionCFG(F, TTI, &DT, Options); + PA.preserve(); + return PA; + } + } + } + + // Evaluate the tasks that might be in parallel with each spindle, and + // determine number of discriminating syncs: syncs that sync a subset of the + // detached tasks, based on sync regions. + MaybeParallelTasks MPTasks; + TI.evaluateParallelState(MPTasks); + + // Simplify syncs in each task in the function. + for (Task *T : post_order(TI.getRootTask())) + Changed |= simplifySyncs(T, MPTasks); + + // Simplify each task in the function. + for (Task *T : post_order(TI.getRootTask())) + Changed |= simplifyTask(T); + + if (PostCleanupCFG && (Changed | SplitBlocks)) + Changed |= simplifyFunctionCFG(F, TTI, nullptr, Options); + + if (!Changed) { + PA.preserve(); + PA.preserve(); + return PA; + } + PA = PreservedAnalyses::none(); + return PA; +} diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index b603bbe55dc9ab..0ae26469815736 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -88,6 +88,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -3092,6 +3093,8 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) { LoopScalarBody = OrigLoop->getHeader(); LoopVectorPreHeader = OrigLoop->getLoopPreheader(); assert(LoopVectorPreHeader && "Invalid loop structure"); + assert(!isa(LoopVectorPreHeader->getTerminator()) && + "Loop preheader terminated by sync."); LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) && "multiple exit loop without required epilogue?"); diff --git a/llvm/projects/CMakeLists.txt b/llvm/projects/CMakeLists.txt index 08f2fa522420b0..9fc530b950bec1 100644 --- a/llvm/projects/CMakeLists.txt +++ b/llvm/projects/CMakeLists.txt @@ -11,7 +11,9 @@ foreach(entry ${entries}) (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/libunwind) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/test-suite) AND (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/openmp) AND - (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cross-project-tests)) + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cross-project-tests) AND + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cheetah) AND + (NOT ${entry} STREQUAL ${CMAKE_CURRENT_SOURCE_DIR}/cilktools)) get_filename_component(entry_name "${entry}" NAME) add_llvm_external_project(${entry_name}) endif() @@ -28,6 +30,8 @@ if(${LLVM_BUILD_RUNTIME}) if(NOT MSVC OR LLVM_FORCE_BUILD_RUNTIME) # Add the projects in reverse order of their dependencies so that the # dependent projects can see the target names of their dependencies. + add_llvm_external_project(cilktools) + add_llvm_external_project(cheetah) add_llvm_external_project(libunwind) add_llvm_external_project(pstl) add_llvm_external_project(libc) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 75256c1326be96..9f02b392131802 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -239,6 +239,8 @@ function(runtime_default_target) -DCMAKE_C_COMPILER_WORKS=ON -DCMAKE_CXX_COMPILER_WORKS=ON -DCMAKE_ASM_COMPILER_WORKS=ON + # TODO: Check if this argument is still needed. + -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET} ${COMMON_CMAKE_ARGS} ${RUNTIMES_CMAKE_ARGS} ${ARG_CMAKE_ARGS} diff --git a/llvm/test/Analysis/TapirRaceDetect/bitcast-function.ll b/llvm/test/Analysis/TapirRaceDetect/bitcast-function.ll new file mode 100644 index 00000000000000..c1b5c5aaa7c1a3 --- /dev/null +++ b/llvm/test/Analysis/TapirRaceDetect/bitcast-function.ll @@ -0,0 +1,27 @@ +; Check static race detection with calls to bitcast functions in +; blocks terminated by unreachable. +; +; RUN: opt < %s -passes='print' -disable-output 2>&1 | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: sanitize_cilk +define dso_local void @setup() local_unnamed_addr #0 { +entry: + tail call void (i32, ...) bitcast (void (...)* @bpnn_initialize to void (i32, ...)*)(i32 7) #2 + unreachable +} + +; CHECK: tail call void (i32, ...) @bpnn_initialize(i32 7) +; CHECK: Opaque +; CHECK: Opaque racer + +declare dso_local void @bpnn_initialize(...) local_unnamed_addr #1 + +attributes #0 = { sanitize_cilk } +attributes #1 = { "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 12.0.0 (git@github.com:OpenCilk/opencilk-project.git 33ec1ef302b9173b44ffda58e6ad9447b803598a)"} diff --git a/llvm/test/Analysis/TapirRaceDetect/check-pointer-with-casts.ll b/llvm/test/Analysis/TapirRaceDetect/check-pointer-with-casts.ll new file mode 100644 index 00000000000000..1fe4693a12afbb --- /dev/null +++ b/llvm/test/Analysis/TapirRaceDetect/check-pointer-with-casts.ll @@ -0,0 +1,370 @@ +; RUN: opt < %s -passes='print' -aa-pipeline=default -evaluate-aa-metadata -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.__cilkrts_hyperobject_base.23.67.122.133.144 = type { %struct.cilk_c_monoid.22.66.121.132.143, i32, i32, i64 } +%struct.cilk_c_monoid.22.66.121.132.143 = type { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, {}*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* } +%struct.IntersectionEventList.27.71.126.137.148 = type { %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147* } +%struct.IntersectionEventNode.26.70.125.136.147 = type { %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*, i32, %struct.IntersectionEventNode.26.70.125.136.147* } +%struct.Line.25.69.124.135.146 = type { %struct.Vec.24.68.123.134.145, %struct.Vec.24.68.123.134.145, %struct.Vec.24.68.123.134.145, i32, double, double, double, double, double, i32 } +%struct.Vec.24.68.123.134.145 = type { double, double } +%struct.CollisionWorld.30.74.129.140.151 = type { %struct.Line.25.69.124.135.146**, %struct.Line.25.69.124.135.146*, i32, i32, i32, %struct.QuadTree.29.73.128.139.150*, i32, i32 } +%struct.QuadTree.29.73.128.139.150 = type { %struct.Boundary.28.72.127.138.149, %struct.Boundary.28.72.127.138.149, i32, %struct.QuadTree.29.73.128.139.150*, [4 x %struct.QuadTree.29.73.128.139.150*], [350 x %struct.Line.25.69.124.135.146*], i32 } +%struct.Boundary.28.72.127.138.149 = type { double, double, double, double } +%struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153 = type { %struct.__cilkrts_hyperobject_base.23.67.122.133.144, [8 x i8], i32, [60 x i8] } +%struct.IEL_Reducer.31.75.130.141.152 = type { %struct.__cilkrts_hyperobject_base.23.67.122.133.144, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] } + +@intersectionEventListR = external dso_local global { { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }, align 64 +@.str = external dso_local unnamed_addr constant [13 x i8], align 1 +@.str.1 = external dso_local unnamed_addr constant [17 x i8], align 1 +@__PRETTY_FUNCTION__.CollisionWorld_new = external dso_local unnamed_addr constant [55 x i8], align 1 +@__const.CollisionWorld_detectIntersection.numLineLineCollisions = external dso_local unnamed_addr constant { { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], i32, [60 x i8] }, align 64 +@.str.2 = external dso_local unnamed_addr constant [25 x i8], align 1 +@__PRETTY_FUNCTION__.CollisionWorld_collisionSolver = external dso_local unnamed_addr constant [88 x i8], align 1 +@.str.3 = external dso_local unnamed_addr constant [108 x i8], align 1 + +declare dso_local void @IEL_Reduce(i8*, i8*, i8*) #0 + +declare dso_local void @IEL_Identity(i8*, i8*) #0 + +declare dso_local void @IEL_Destroy(i8*, i8*) #0 + +declare dso_local i8* @__cilkrts_hyper_alloc(%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64) #0 + +declare dso_local void @__cilkrts_hyper_dealloc(%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*) #0 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local %struct.CollisionWorld.30.74.129.140.151* @CollisionWorld_new(i32) local_unnamed_addr #1 + +; Function Attrs: noreturn nounwind +declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) local_unnamed_addr #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3 + +; Function Attrs: nofree nounwind +declare dso_local noalias i8* @malloc(i64) local_unnamed_addr #4 + +; Function Attrs: inlinehint nounwind sanitize_cilk uwtable +declare dso_local fastcc void @boundary_make(%struct.Boundary.28.72.127.138.149* noalias sret(%struct.Boundary.28.72.127.138.149), double, double, double) unnamed_addr #5 + +declare dso_local %struct.QuadTree.29.73.128.139.150* @quadtree_new(%struct.Boundary.28.72.127.138.149* byval(%struct.Boundary.28.72.127.138.149) align 8, %struct.QuadTree.29.73.128.139.150*) local_unnamed_addr #0 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_delete(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +; Function Attrs: nounwind +declare dso_local void @free(i8* nocapture) local_unnamed_addr #6 + +declare dso_local void @quadtree_delete(%struct.QuadTree.29.73.128.139.150*) local_unnamed_addr #0 + +; Function Attrs: inlinehint nounwind sanitize_cilk uwtable +declare dso_local i32 @CollisionWorld_getNumOfLines(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #5 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_addOldLine(%struct.CollisionWorld.30.74.129.140.151*, %struct.Line.25.69.124.135.146*) local_unnamed_addr #1 + +declare dso_local zeroext i1 @quadtree_addLine(%struct.QuadTree.29.73.128.139.150*, %struct.Line.25.69.124.135.146*) local_unnamed_addr #0 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_addLines(%struct.CollisionWorld.30.74.129.140.151*, i32) local_unnamed_addr #1 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local %struct.Line.25.69.124.135.146* @CollisionWorld_getLine(%struct.CollisionWorld.30.74.129.140.151*, i32) local_unnamed_addr #1 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_updateLines(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +; Function Attrs: nounwind sanitize_cilk uwtable +define dso_local void @CollisionWorld_detectIntersection(%struct.CollisionWorld.30.74.129.140.151* %collisionWorld) local_unnamed_addr #1 { +entry: + %numLineLineCollisions = alloca %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153, align 64 + %syncreg = call token @llvm.syncregion.start() + call void @__cilkrts_hyper_create(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 0)) + %0 = bitcast %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153* %numLineLineCollisions to i8* + call void @llvm.lifetime.start.p0i8(i64 128, i8* %0) #8 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 64 %0, i8* align 64 bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], i32, [60 x i8] }* @__const.CollisionWorld_detectIntersection.numLineLineCollisions to i8*), i64 128, i1 false) + %__cilkrts_hyperbase = getelementptr inbounds %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153, %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153* %numLineLineCollisions, i32 0, i32 0 + call void @__cilkrts_hyper_create(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* %__cilkrts_hyperbase) + %numOfLines = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 2 + %1 = load i32, i32* %numOfLines, align 8, !tbaa !2 + %cmp = icmp slt i32 0, %1 + br i1 %cmp, label %pfor.ph, label %cleanup32 + +pfor.ph: ; preds = %entry + %lines = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 0 + %qt = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 5 + %lines24 = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 0 + br label %pfor.cond + +pfor.cond: ; preds = %pfor.inc, %pfor.ph + %__begin.0 = phi i32 [ 0, %pfor.ph ], [ %inc27, %pfor.inc ] + detach within %syncreg, label %pfor.body.entry, label %pfor.inc + +pfor.body.entry: ; preds = %pfor.cond + br label %pfor.body + +pfor.body: ; preds = %pfor.body.entry + %add3 = add nsw i32 %__begin.0, 1 + %idxprom = sext i32 %__begin.0 to i64 + br label %for.cond + +for.cond: ; preds = %cleanup, %pfor.body + %j.0 = phi i32 [ %add3, %pfor.body ], [ %inc22, %cleanup ] + %2 = load i32, i32* %numOfLines, align 8, !tbaa !2 + %cmp5 = icmp ult i32 %j.0, %2 + br i1 %cmp5, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + %3 = load %struct.QuadTree.29.73.128.139.150*, %struct.QuadTree.29.73.128.139.150** %qt, align 8, !tbaa !8 + %4 = load %struct.Line.25.69.124.135.146**, %struct.Line.25.69.124.135.146*** %lines24, align 8, !tbaa !9 + %idxprom25 = sext i32 %__begin.0 to i64 + %arrayidx26 = getelementptr inbounds %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %4, i64 %idxprom25 + %5 = load %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %arrayidx26, align 8, !tbaa !10 + call void @quadtree_detectLineCollision(%struct.QuadTree.29.73.128.139.150* %3, %struct.Line.25.69.124.135.146* %5, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153* %numLineLineCollisions) + reattach within %syncreg, label %pfor.inc + +; CHECK: I = call void @quadtree_detectLineCollision(ptr %3, ptr %5, ptr @intersectionEventListR, ptr %numLineLineCollisions) +; CHECK: Loc = @intersectionEventListR +; CHECK-NEXT: OperandNum = 2 + +for.body: ; preds = %for.cond + %6 = load %struct.Line.25.69.124.135.146**, %struct.Line.25.69.124.135.146*** %lines, align 8, !tbaa !9 + %arrayidx = getelementptr inbounds %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %6, i64 %idxprom + %7 = load %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %arrayidx, align 8, !tbaa !10 + %idxprom8 = sext i32 %j.0 to i64 + %arrayidx9 = getelementptr inbounds %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %6, i64 %idxprom8 + %8 = load %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %arrayidx9, align 8, !tbaa !10 + %call = call fastcc zeroext i1 @quick_intersection_detection(%struct.Line.25.69.124.135.146* %7, %struct.Line.25.69.124.135.146* %8) + br i1 %call, label %if.end, label %cleanup + +if.end: ; preds = %for.body + %call10 = call fastcc i32 @compareLines(%struct.Line.25.69.124.135.146* %7, %struct.Line.25.69.124.135.146* %8) + %cmp11 = icmp sge i32 %call10, 0 + %spec.select = select i1 %cmp11, %struct.Line.25.69.124.135.146* %8, %struct.Line.25.69.124.135.146* %7 + %spec.select1 = select i1 %cmp11, %struct.Line.25.69.124.135.146* %7, %struct.Line.25.69.124.135.146* %8 + %call14 = call i32 @intersect(%struct.Line.25.69.124.135.146* %spec.select, %struct.Line.25.69.124.135.146* %spec.select1) + %cmp15 = icmp ne i32 %call14, 0 + br i1 %cmp15, label %if.then16, label %cleanup + +if.then16: ; preds = %if.end + %call17 = call strand_noalias i8* @__cilkrts_hyper_lookup(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 0)) #9 + %9 = bitcast i8* %call17 to %struct.IntersectionEventList.27.71.126.137.148* + call void @IntersectionEventList_appendNode(%struct.IntersectionEventList.27.71.126.137.148* %9, %struct.Line.25.69.124.135.146* %spec.select, %struct.Line.25.69.124.135.146* %spec.select1, i32 %call14) + %call19 = call strand_noalias i8* @__cilkrts_hyper_lookup(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* %__cilkrts_hyperbase) #9 + %10 = bitcast i8* %call19 to i32* + %11 = load i32, i32* %10, align 4, !tbaa !11 + %inc = add i32 %11, 1 + store i32 %inc, i32* %10, align 4, !tbaa !11 + br label %cleanup + +cleanup: ; preds = %if.then16, %if.end, %for.body + %inc22 = add nsw i32 %j.0, 1 + br label %for.cond + +pfor.inc: ; preds = %for.cond.cleanup, %pfor.cond + %inc27 = add nsw i32 %__begin.0, 1 + %cmp28 = icmp slt i32 %inc27, %1 + br i1 %cmp28, label %pfor.cond, label %pfor.cond.cleanup, !llvm.loop !12 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %cleanup32 + +cleanup32: ; preds = %pfor.cond.cleanup, %entry + %qt34 = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 5 + %12 = load %struct.QuadTree.29.73.128.139.150*, %struct.QuadTree.29.73.128.139.150** %qt34, align 8, !tbaa !8 + call void @quadtree_detectIntersection(%struct.QuadTree.29.73.128.139.150* %12, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153* %numLineLineCollisions) + %value = getelementptr inbounds %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153, %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153* %numLineLineCollisions, i32 0, i32 2 + %13 = load i32, i32* %value, align 64, !tbaa !14 + %numLineLineCollisions35 = getelementptr inbounds %struct.CollisionWorld.30.74.129.140.151, %struct.CollisionWorld.30.74.129.140.151* %collisionWorld, i32 0, i32 7 + %14 = load i32, i32* %numLineLineCollisions35, align 4, !tbaa !19 + %add36 = add i32 %14, %13 + store i32 %add36, i32* %numLineLineCollisions35, align 4, !tbaa !19 + call void @__cilkrts_hyper_destroy(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* %__cilkrts_hyperbase) + %15 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 2, i32 0), align 64, !tbaa !20 + br label %while.cond + +while.cond: ; preds = %if.end49, %cleanup32 + %startNode.0 = phi %struct.IntersectionEventNode.26.70.125.136.147* [ %15, %cleanup32 ], [ %18, %if.end49 ] + %cmp38 = icmp ne %struct.IntersectionEventNode.26.70.125.136.147* %startNode.0, null + br i1 %cmp38, label %while.body, label %while.end51 + +while.body: ; preds = %while.cond + %next = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %startNode.0, i32 0, i32 3 + %16 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** %next, align 8, !tbaa !23 + br label %while.cond39 + +while.cond39: ; preds = %while.body41, %while.body + %minNode.0 = phi %struct.IntersectionEventNode.26.70.125.136.147* [ %startNode.0, %while.body ], [ %spec.select2, %while.body41 ] + %curNode.0 = phi %struct.IntersectionEventNode.26.70.125.136.147* [ %16, %while.body ], [ %17, %while.body41 ] + %cmp40 = icmp ne %struct.IntersectionEventNode.26.70.125.136.147* %curNode.0, null + br i1 %cmp40, label %while.body41, label %while.end + +while.body41: ; preds = %while.cond39 + %call42 = call i32 @IntersectionEventNode_compareData(%struct.IntersectionEventNode.26.70.125.136.147* %curNode.0, %struct.IntersectionEventNode.26.70.125.136.147* %minNode.0) + %cmp43 = icmp slt i32 %call42, 0 + %spec.select2 = select i1 %cmp43, %struct.IntersectionEventNode.26.70.125.136.147* %curNode.0, %struct.IntersectionEventNode.26.70.125.136.147* %minNode.0 + %next46 = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %curNode.0, i32 0, i32 3 + %17 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** %next46, align 8, !tbaa !23 + br label %while.cond39 + +while.end: ; preds = %while.cond39 + %minNode.0.lcssa = phi %struct.IntersectionEventNode.26.70.125.136.147* [ %minNode.0, %while.cond39 ] + %cmp47 = icmp ne %struct.IntersectionEventNode.26.70.125.136.147* %minNode.0.lcssa, %startNode.0 + br i1 %cmp47, label %if.then48, label %if.end49 + +if.then48: ; preds = %while.end + call void @IntersectionEventNode_swapData(%struct.IntersectionEventNode.26.70.125.136.147* %minNode.0.lcssa, %struct.IntersectionEventNode.26.70.125.136.147* %startNode.0) + br label %if.end49 + +if.end49: ; preds = %if.then48, %while.end + %18 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** %next, align 8, !tbaa !23 + br label %while.cond + +while.end51: ; preds = %while.cond + %19 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 2, i32 0), align 64, !tbaa !20 + br label %while.cond53 + +while.cond53: ; preds = %while.body55, %while.end51 + %curNode52.0 = phi %struct.IntersectionEventNode.26.70.125.136.147* [ %19, %while.end51 ], [ %23, %while.body55 ] + %cmp54 = icmp ne %struct.IntersectionEventNode.26.70.125.136.147* %curNode52.0, null + br i1 %cmp54, label %while.body55, label %while.end60 + +while.body55: ; preds = %while.cond53 + %l156 = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %curNode52.0, i32 0, i32 0 + %20 = load %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %l156, align 8, !tbaa !25 + %l257 = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %curNode52.0, i32 0, i32 1 + %21 = load %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146** %l257, align 8, !tbaa !26 + %intersectionType58 = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %curNode52.0, i32 0, i32 2 + %22 = load i32, i32* %intersectionType58, align 8, !tbaa !27 + call void @CollisionWorld_collisionSolver(%struct.CollisionWorld.30.74.129.140.151* %collisionWorld, %struct.Line.25.69.124.135.146* %20, %struct.Line.25.69.124.135.146* %21, i32 %22) + %next59 = getelementptr inbounds %struct.IntersectionEventNode.26.70.125.136.147, %struct.IntersectionEventNode.26.70.125.136.147* %curNode52.0, i32 0, i32 3 + %23 = load %struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147** %next59, align 8, !tbaa !23 + br label %while.cond53 + +while.end60: ; preds = %while.cond53 + call void @IntersectionEventList_deleteNodes(%struct.IntersectionEventList.27.71.126.137.148* getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 2)) + call void @__cilkrts_hyper_destroy(%struct.__cilkrts_hyperobject_base.23.67.122.133.144* getelementptr inbounds (%struct.IEL_Reducer.31.75.130.141.152, %struct.IEL_Reducer.31.75.130.141.152* bitcast ({ { { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i64)*, void (%struct.__cilkrts_hyperobject_base.23.67.122.133.144*, i8*)* }, i32, i32, i64 }, [8 x i8], %struct.IntersectionEventList.27.71.126.137.148, [48 x i8] }* @intersectionEventListR to %struct.IEL_Reducer.31.75.130.141.152*), i32 0, i32 0)) + call void @llvm.lifetime.end.p0i8(i64 128, i8* %0) #8 + ret void +} + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_updatePosition(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_lineWallCollision(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +; Function Attrs: inlinehint nounwind sanitize_cilk uwtable +declare dso_local fastcc void @line_update(%struct.Line.25.69.124.135.146*) unnamed_addr #5 + +declare dso_local void @__cilkrts_hyper_create(%struct.__cilkrts_hyperobject_base.23.67.122.133.144*) local_unnamed_addr #0 + +declare dso_local void @cilk_c_reducer_opadd_reduce_unsigned(i8*, i8*, i8*) #0 + +declare dso_local void @cilk_c_reducer_opadd_identity_unsigned(i8*, i8*) #0 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 + +; Function Attrs: argmemonly nounwind willreturn +declare token @llvm.syncregion.start() #3 + +; Function Attrs: inlinehint nounwind sanitize_cilk uwtable +declare dso_local fastcc zeroext i1 @quick_intersection_detection(%struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*) unnamed_addr #5 + +; Function Attrs: inlinehint nounwind sanitize_cilk uwtable +declare dso_local fastcc i32 @compareLines(%struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*) unnamed_addr #5 + +declare dso_local i32 @intersect(%struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*) local_unnamed_addr #0 + +declare dso_local void @IntersectionEventList_appendNode(%struct.IntersectionEventList.27.71.126.137.148*, %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*, i32) local_unnamed_addr #0 + +; Function Attrs: nounwind readonly strand_pure +declare dso_local strand_noalias i8* @__cilkrts_hyper_lookup(%struct.__cilkrts_hyperobject_base.23.67.122.133.144*) local_unnamed_addr #7 + +declare dso_local void @quadtree_detectLineCollision(%struct.QuadTree.29.73.128.139.150*, %struct.Line.25.69.124.135.146*, %struct.IEL_Reducer.31.75.130.141.152*, %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153*) local_unnamed_addr #0 + +declare dso_local void @quadtree_detectIntersection(%struct.QuadTree.29.73.128.139.150*, %struct.IEL_Reducer.31.75.130.141.152*, %struct.cilk_c_reducer_opadd_unsigned.32.76.131.142.153*) local_unnamed_addr #0 + +declare dso_local void @__cilkrts_hyper_destroy(%struct.__cilkrts_hyperobject_base.23.67.122.133.144*) local_unnamed_addr #0 + +declare dso_local i32 @IntersectionEventNode_compareData(%struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147*) local_unnamed_addr #0 + +declare dso_local void @IntersectionEventNode_swapData(%struct.IntersectionEventNode.26.70.125.136.147*, %struct.IntersectionEventNode.26.70.125.136.147*) local_unnamed_addr #0 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local void @CollisionWorld_collisionSolver(%struct.CollisionWorld.30.74.129.140.151*, %struct.Line.25.69.124.135.146*, %struct.Line.25.69.124.135.146*, i32) local_unnamed_addr #1 + +declare dso_local void @IntersectionEventList_deleteNodes(%struct.IntersectionEventList.27.71.126.137.148*) local_unnamed_addr #0 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local i32 @CollisionWorld_getNumLineWallCollisions(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +; Function Attrs: nounwind sanitize_cilk uwtable +declare dso_local i32 @CollisionWorld_getNumLineLineCollisions(%struct.CollisionWorld.30.74.129.140.151*) local_unnamed_addr #1 + +declare dso_local { double, double } @getIntersectionPoint(double, double, double, double, double, double, double, double) local_unnamed_addr #0 + +declare dso_local double @Vec_length(double, double) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_subtract(double, double, double, double) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_multiply(double, double, double) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_normalize(double, double) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_makeFromLine(%struct.Line.25.69.124.135.146* byval(%struct.Line.25.69.124.135.146) align 8) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_orthogonal(double, double) local_unnamed_addr #0 + +declare dso_local double @Vec_dotProduct(double, double, double, double) local_unnamed_addr #0 + +declare dso_local { double, double } @Vec_add(double, double, double, double) local_unnamed_addr #0 + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #1 = { nounwind sanitize_cilk uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #2 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #3 = { argmemonly nounwind willreturn } +attributes #4 = { nofree nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #5 = { inlinehint nounwind sanitize_cilk uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #7 = { nounwind readonly strand_pure "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #8 = { nounwind } +attributes #9 = { nounwind readonly strand_pure } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.1 (git@github.com:OpenCilk/opencilk-project.git 496e6fe856f27dcf0c54f67024b3e85421b1b3a4)"} +!2 = !{!3, !7, i64 16} +!3 = !{!"CollisionWorld", !4, i64 0, !4, i64 8, !7, i64 16, !7, i64 20, !7, i64 24, !4, i64 32, !7, i64 40, !7, i64 44} +!4 = !{!"any pointer", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} +!7 = !{!"int", !5, i64 0} +!8 = !{!3, !4, i64 32} +!9 = !{!3, !4, i64 0} +!10 = !{!4, !4, i64 0} +!11 = !{!7, !7, i64 0} +!12 = distinct !{!12, !13} +!13 = !{!"tapir.loop.spawn.strategy", i32 1} +!14 = !{!15, !7, i64 64} +!15 = !{!"", !16, i64 0, !7, i64 64} +!16 = !{!"__cilkrts_hyperobject_base", !17, i64 0, !7, i64 40, !7, i64 44, !18, i64 48} +!17 = !{!"cilk_c_monoid", !4, i64 0, !4, i64 8, !4, i64 16, !4, i64 24, !4, i64 32} +!18 = !{!"long", !5, i64 0} +!19 = !{!3, !7, i64 44} +!20 = !{!21, !4, i64 64} +!21 = !{!"", !16, i64 0, !22, i64 64} +!22 = !{!"IntersectionEventList", !4, i64 0, !4, i64 8} +!23 = !{!24, !4, i64 24} +!24 = !{!"IntersectionEventNode", !4, i64 0, !4, i64 8, !5, i64 16, !4, i64 24} +!25 = !{!24, !4, i64 0} +!26 = !{!24, !4, i64 8} +!27 = !{!24, !5, i64 16} diff --git a/llvm/test/Analysis/TapirRaceDetect/tapir-rd-objects.ll b/llvm/test/Analysis/TapirRaceDetect/tapir-rd-objects.ll new file mode 100644 index 00000000000000..72b0bc758f3e02 --- /dev/null +++ b/llvm/test/Analysis/TapirRaceDetect/tapir-rd-objects.ll @@ -0,0 +1,6079 @@ +; RUN: opt < %s -passes='print' -aa-pipeline=default -evaluate-aa-metadata -disable-output 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.cilk_c_monoid = type { void (i8*, i8*, i8*)*, void (i8*, i8*)*, void (i8*, i8*)*, i8* (i8*, i64)*, void (i8*, i8*)* } +%class.Graph = type { i32, i32, i32*, i32* } +%class.Bag_reducer = type { %"class.cilk::reducer" } +%"class.cilk::reducer" = type { %"class.cilk::internal::reducer_content.base", i8 } +%"class.cilk::internal::reducer_content.base" = type <{ %"class.cilk::internal::reducer_base", [127 x i8] }> +%"class.cilk::internal::reducer_base" = type { %struct.__cilkrts_hyperobject_base, %"class.cilk::internal::storage_for_object", i8* } +%struct.__cilkrts_hyperobject_base = type { %struct.cilk_c_monoid, i64, i64, i64 } +%"class.cilk::internal::storage_for_object" = type { %"class.cilk::internal::aligned_storage" } +%"class.cilk::internal::aligned_storage" = type { [1 x i8] } +%class.Bag = type <{ i32, [4 x i8], %class.Pennant**, i32*, i32, [4 x i8] }> +%class.Pennant = type { i32*, %class.Pennant*, %class.Pennant* } + +$_ZNK5Graph13pbfs_walk_BagEP3BagIiEP11Bag_reducerIiEjPj = comdat any + +; Function Attrs: inlinehint uwtable +define linkonce_odr dso_local void @_ZNK5Graph13pbfs_walk_BagEP3BagIiEP11Bag_reducerIiEjPj(%class.Graph* %this, %class.Bag* %b, %class.Bag_reducer* %next, i32 %newdist, i32* %distances) local_unnamed_addr #10 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg !2219 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + call void @llvm.dbg.value(metadata %class.Graph* %this, metadata !2221, metadata !DIExpression()), !dbg !2240 + call void @llvm.dbg.value(metadata %class.Bag* %b, metadata !2222, metadata !DIExpression()), !dbg !2241 + call void @llvm.dbg.value(metadata %class.Bag_reducer* %next, metadata !2223, metadata !DIExpression()), !dbg !2242 + call void @llvm.dbg.value(metadata i32 %newdist, metadata !2224, metadata !DIExpression()), !dbg !2243 + call void @llvm.dbg.value(metadata i32* %distances, metadata !2225, metadata !DIExpression()), !dbg !2244 + call void @llvm.dbg.value(metadata %class.Bag* %b, metadata !2245, metadata !DIExpression()), !dbg !2248 + %fill.i = getelementptr inbounds %class.Bag, %class.Bag* %b, i64 0, i32 0, !dbg !2250 + %0 = load i32, i32* %fill.i, align 8, !dbg !2250, !tbaa !1992 + %cmp = icmp eq i32 %0, 0, !dbg !2251 + br i1 %cmp, label %if.else, label %if.end.i, !dbg !2252 + +if.end.i: ; preds = %entry + call void @llvm.dbg.value(metadata %class.Pennant* null, metadata !2226, metadata !DIExpression()), !dbg !2253 + call void @llvm.dbg.value(metadata %class.Pennant** undef, metadata !2226, metadata !DIExpression(DW_OP_deref)), !dbg !2253 + call void @llvm.dbg.value(metadata %class.Bag* %b, metadata !2254, metadata !DIExpression()), !dbg !2258 + call void @llvm.dbg.value(metadata %class.Pennant** undef, metadata !2257, metadata !DIExpression()), !dbg !2260 + %dec.i = add i32 %0, -1, !dbg !2261 + store i32 %dec.i, i32* %fill.i, align 8, !dbg !2261, !tbaa !1992 + %bag.i = getelementptr inbounds %class.Bag, %class.Bag* %b, i64 0, i32 2, !dbg !2262 + %1 = load %class.Pennant**, %class.Pennant*** %bag.i, align 8, !dbg !2262, !tbaa !2188 + %idxprom.i = zext i32 %dec.i to i64, !dbg !2263 + %arrayidx.i = getelementptr inbounds %class.Pennant*, %class.Pennant** %1, i64 %idxprom.i, !dbg !2263 + %2 = load %class.Pennant*, %class.Pennant** %arrayidx.i, align 8, !dbg !2263, !tbaa !1901 + store %class.Pennant* null, %class.Pennant** %arrayidx.i, align 8, !dbg !2264, !tbaa !1901 + %cmp921.i = icmp eq i32 %dec.i, 0, !dbg !2265 + br i1 %cmp921.i, label %_ZN3BagIiE5splitEPP7PennantIiE.exit, label %for.body.lr.ph.i, !dbg !2268 + +for.body.lr.ph.i: ; preds = %if.end.i + %3 = load %class.Pennant**, %class.Pennant*** %bag.i, align 8, !tbaa !2188 + br label %for.body.i, !dbg !2268 + +for.body.i: ; preds = %for.inc.i, %for.body.lr.ph.i + %indvars.iv.i = phi i64 [ %idxprom.i, %for.body.lr.ph.i ], [ %indvars.iv.next.i, %for.inc.i ] + %4 = trunc i64 %indvars.iv.i to i32, !dbg !2269 + %sub.i = add i32 %4, -1, !dbg !2269 + %idxprom12.i = zext i32 %sub.i to i64, !dbg !2272 + %arrayidx13.i = getelementptr inbounds %class.Pennant*, %class.Pennant** %3, i64 %idxprom12.i, !dbg !2272 + %5 = load %class.Pennant*, %class.Pennant** %arrayidx13.i, align 8, !dbg !2272, !tbaa !1901 + %cmp14.i = icmp eq %class.Pennant* %5, null, !dbg !2273 + br i1 %cmp14.i, label %for.inc.i, label %_ZN3BagIiE5splitEPP7PennantIiE.exit, !dbg !2274 + +for.inc.i: ; preds = %for.body.i + store i32 %sub.i, i32* %fill.i, align 8, !dbg !2275, !tbaa !1992 + %cmp9.i = icmp eq i32 %sub.i, 0, !dbg !2265 + %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1, !dbg !2269 + br i1 %cmp9.i, label %_ZN3BagIiE5splitEPP7PennantIiE.exit, label %for.body.i, !dbg !2268, !llvm.loop !2276 + +_ZN3BagIiE5splitEPP7PennantIiE.exit: ; preds = %for.body.i, %for.inc.i, %if.end.i + detach within %syncreg, label %det.achd, label %det.cont unwind label %lpad3, !dbg !2279 + +det.achd: ; preds = %_ZN3BagIiE5splitEPP7PennantIiE.exit + invoke void @_ZNK5Graph13pbfs_walk_BagEP3BagIiEP11Bag_reducerIiEjPj(%class.Graph* %this, %class.Bag* nonnull %b, %class.Bag_reducer* %next, i32 %newdist, i32* %distances) + to label %invoke.cont unwind label %lpad, !dbg !2279 + +invoke.cont: ; preds = %det.achd + reattach within %syncreg, label %det.cont, !dbg !2279 + +det.cont: ; preds = %_ZN3BagIiE5splitEPP7PennantIiE.exit, %invoke.cont + call void @llvm.dbg.value(metadata %class.Pennant* %2, metadata !2226, metadata !DIExpression()), !dbg !2253 + invoke void @_ZNK5Graph17pbfs_walk_PennantEP7PennantIiEP11Bag_reducerIiEjPj(%class.Graph* %this, %class.Pennant* %2, %class.Bag_reducer* %next, i32 %newdist, i32* %distances) + to label %invoke.cont7 unwind label %lpad3, !dbg !2280 + +invoke.cont7: ; preds = %det.cont + sync within %syncreg, label %if.end, !dbg !2281 + +lpad: ; preds = %det.achd + %6 = landingpad { i8*, i32 } + catch i8* null, !dbg !2282 + invoke void @llvm.detached.rethrow.sl_p0i8i32s(token %syncreg, { i8*, i32 } %6) + to label %det.rethrow.unreachable unwind label %lpad3, !dbg !2279 + +det.rethrow.unreachable: ; preds = %lpad + unreachable, !dbg !2279 + +lpad3: ; preds = %det.cont, %_ZN3BagIiE5splitEPP7PennantIiE.exit, %lpad + %7 = landingpad { i8*, i32 } + cleanup, !dbg !2282 + %8 = extractvalue { i8*, i32 } %7, 0, !dbg !2282 + %9 = extractvalue { i8*, i32 } %7, 1, !dbg !2282 + sync within %syncreg, label %eh.resume, !dbg !2283 + +if.else: ; preds = %entry + call void @llvm.dbg.value(metadata %class.Bag* %b, metadata !2284, metadata !DIExpression()), !dbg !2287 + %size.i = getelementptr inbounds %class.Bag, %class.Bag* %b, i64 0, i32 4, !dbg !2289 + %10 = load i32, i32* %size.i, align 8, !dbg !2289, !tbaa !1998 + call void @llvm.dbg.value(metadata i32 %10, metadata !2229, metadata !DIExpression()), !dbg !2290 + call void @llvm.dbg.value(metadata %class.Bag* %b, metadata !2291, metadata !DIExpression()), !dbg !2294 + %filling.i182 = getelementptr inbounds %class.Bag, %class.Bag* %b, i64 0, i32 3, !dbg !2296 + %11 = load i32*, i32** %filling.i182, align 8, !dbg !2296, !tbaa !2155 + call void @llvm.dbg.value(metadata i32* %11, metadata !2231, metadata !DIExpression()), !dbg !2297 + %rem = srem i32 %10, 256, !dbg !2298 + call void @llvm.dbg.value(metadata i32 %rem, metadata !2232, metadata !DIExpression()), !dbg !2299 + %nodes = getelementptr inbounds %class.Graph, %class.Graph* %this, i64 0, i32 2, !dbg !2300 + %12 = load i32*, i32** %nodes, align 8, !dbg !2300, !tbaa !1682 + %edges = getelementptr inbounds %class.Graph, %class.Graph* %this, i64 0, i32 3, !dbg !2301 + %13 = load i32*, i32** %edges, align 8, !dbg !2301, !tbaa !1687 + detach within %syncreg, label %det.achd13, label %det.cont18 unwind label %lpad19, !dbg !2302 + +det.achd13: ; preds = %if.else + %syncreg.i = tail call token @llvm.syncregion.start() + %idx.ext = sext i32 %10 to i64, !dbg !2303 + %add.ptr = getelementptr inbounds i32, i32* %11, i64 %idx.ext, !dbg !2303 + %narrow = sub nsw i32 0, %rem, !dbg !2304 + %idx.neg = sext i32 %narrow to i64, !dbg !2304 + %add.ptr12 = getelementptr inbounds i32, i32* %add.ptr, i64 %idx.neg, !dbg !2304 + call void @llvm.dbg.value(metadata i32* %add.ptr12, metadata !2305, metadata !DIExpression()), !dbg !2341 + call void @llvm.dbg.value(metadata i32 %rem, metadata !2311, metadata !DIExpression()), !dbg !2343 + call void @llvm.dbg.value(metadata %class.Bag_reducer* %next, metadata !2312, metadata !DIExpression()), !dbg !2344 + call void @llvm.dbg.value(metadata i32 %newdist, metadata !2313, metadata !DIExpression()), !dbg !2345 + call void @llvm.dbg.value(metadata i32* %distances, metadata !2314, metadata !DIExpression()), !dbg !2346 + call void @llvm.dbg.value(metadata i32* %12, metadata !2315, metadata !DIExpression()), !dbg !2347 + call void @llvm.dbg.value(metadata i32* %13, metadata !2316, metadata !DIExpression()), !dbg !2348 + call void @llvm.dbg.value(metadata %class.Bag_reducer* %next, metadata !2028, metadata !DIExpression()), !dbg !2349 + call void @llvm.dbg.value(metadata %class.Bag_reducer* %next, metadata !2009, metadata !DIExpression()), !dbg !2351 + call void @llvm.dbg.value(metadata %class.Bag_reducer* %next, metadata !1978, metadata !DIExpression()), !dbg !2353 + %m_base.i.i.i.i = getelementptr inbounds %class.Bag_reducer, %class.Bag_reducer* %next, i64 0, i32 0, i32 0, i32 0, i32 0, !dbg !2355 + %call.i.i.i.i108 = invoke i8* @__cilkrts_hyper_lookup(%struct.__cilkrts_hyperobject_base* %m_base.i.i.i.i) + to label %call.i.i.i.i.noexc unwind label %lpad14.loopexit.split-lp, !dbg !2356 + +call.i.i.i.i.noexc: ; preds = %det.achd13 + call void @llvm.dbg.value(metadata i8* %call.i.i.i.i108, metadata !2317, metadata !DIExpression()), !dbg !2357 + call void @llvm.dbg.value(metadata i32 0, metadata !2318, metadata !DIExpression()), !dbg !2358 + %cmp105.i = icmp sgt i32 %rem, 0, !dbg !2359 + br i1 %cmp105.i, label %for.body.preheader.i, label %invoke.cont17, !dbg !2360 + +for.body.preheader.i: ; preds = %call.i.i.i.i.noexc + %14 = sext i32 %rem to i64, !dbg !2361 + %filling.i183 = getelementptr inbounds i8, i8* %call.i.i.i.i108, i64 16 + %15 = bitcast i8* %filling.i183 to i32** + %size.i184 = getelementptr inbounds i8, i8* %call.i.i.i.i108, i64 24 + %16 = bitcast i8* %size.i184 to i32* + %17 = bitcast i8* %filling.i183 to i8** + %fill.i191 = bitcast i8* %call.i.i.i.i108 to i32* + %bag.i192 = getelementptr inbounds i8, i8* %call.i.i.i.i108, i64 8 + %18 = bitcast i8* %bag.i192 to %class.Pennant*** + br label %for.body.i102, !dbg !2361 + +for.body.i102: ; preds = %if.end44.i, %for.body.preheader.i + %indvars.iv111.i = phi i64 [ 0, %for.body.preheader.i ], [ %indvars.iv.next112.i, %if.end44.i ] + call void @llvm.dbg.value(metadata i64 %indvars.iv111.i, metadata !2318, metadata !DIExpression()), !dbg !2358 + %arrayidx.i100 = getelementptr inbounds i32, i32* %add.ptr12, i64 %indvars.iv111.i, !dbg !2361 + %19 = load i32, i32* %arrayidx.i100, align 4, !dbg !2361, !tbaa !1701 + %idxprom1.i = sext i32 %19 to i64, !dbg !2362 + %arrayidx2.i = getelementptr inbounds i32, i32* %12, i64 %idxprom1.i, !dbg !2362 + %20 = load i32, i32* %arrayidx2.i, align 4, !dbg !2362, !tbaa !1701 + call void @llvm.dbg.value(metadata i32 %20, metadata !2320, metadata !DIExpression()), !dbg !2363 + %add.i = add nsw i32 %19, 1, !dbg !2364 + %idxprom5.i = sext i32 %add.i to i64, !dbg !2365 + %arrayidx6.i = getelementptr inbounds i32, i32* %12, i64 %idxprom5.i, !dbg !2365 + %21 = load i32, i32* %arrayidx6.i, align 4, !dbg !2365, !tbaa !1701 + call void @llvm.dbg.value(metadata i32 %21, metadata !2323, metadata !DIExpression()), !dbg !2366 + %sub.i101 = sub i32 %21, %20, !dbg !2367 + %cmp7.i = icmp slt i32 %sub.i101, 128, !dbg !2368 + %cmp9103.i = icmp sgt i32 %21, %20, !dbg !2369 + br i1 %cmp7.i, label %for.cond8.preheader.i, label %if.else.i, !dbg !2370 + +for.cond8.preheader.i: ; preds = %for.body.i102 + call void @llvm.dbg.value(metadata i32 %20, metadata !2324, metadata !DIExpression()), !dbg !2371 + br i1 %cmp9103.i, label %for.body11.preheader.i, label %if.end44.i, !dbg !2372 + +for.body11.preheader.i: ; preds = %for.cond8.preheader.i + %22 = sext i32 %20 to i64, !dbg !2373 + br label %for.body11.i, !dbg !2373 + +for.body11.i: ; preds = %if.end.i104, %for.body11.preheader.i + %indvars.iv108.i = phi i64 [ %22, %for.body11.preheader.i ], [ %indvars.iv.next109.i, %if.end.i104 ] + call void @llvm.dbg.value(metadata i64 %indvars.iv108.i, metadata !2324, metadata !DIExpression()), !dbg !2371 + %arrayidx13.i103 = getelementptr inbounds i32, i32* %13, i64 %indvars.iv108.i, !dbg !2373 + %23 = load i32, i32* %arrayidx13.i103, align 4, !dbg !2373, !tbaa !1701 + call void @llvm.dbg.value(metadata i32 %23, metadata !2328, metadata !DIExpression()), !dbg !2374 + %idxprom14.i = sext i32 %23 to i64, !dbg !2375 + %arrayidx15.i = getelementptr inbounds i32, i32* %distances, i64 %idxprom14.i, !dbg !2375 + %24 = load i32, i32* %arrayidx15.i, align 4, !dbg !2375, !tbaa !1701 + %cmp16.i = icmp ugt i32 %24, %newdist, !dbg !2377 + br i1 %cmp16.i, label %if.then17.i, label %if.end.i104, !dbg !2378 + +if.then17.i: ; preds = %for.body11.i + call void @llvm.dbg.value(metadata i8* %call.i.i.i.i108, metadata !2143, metadata !DIExpression()), !dbg !2379 + call void @llvm.dbg.value(metadata i32 %23, metadata !2140, metadata !DIExpression()), !dbg !2382 + %25 = load i32*, i32** %15, align 8, !dbg !2383, !tbaa !2155 + %26 = load i32, i32* %16, align 8, !dbg !2384, !tbaa !1998 + %inc.i185 = add i32 %26, 1, !dbg !2384 + store i32 %inc.i185, i32* %16, align 8, !dbg !2384, !tbaa !1998 + %idxprom.i186 = zext i32 %26 to i64, !dbg !2385 + %arrayidx.i187 = getelementptr inbounds i32, i32* %25, i64 %idxprom.i186, !dbg !2385 + store i32 %23, i32* %arrayidx.i187, align 4, !dbg !2386, !tbaa !1701 + %27 = load i32, i32* %16, align 8, !dbg !2387, !tbaa !1998 + %cmp.i188 = icmp ult i32 %27, 2048, !dbg !2388 + br i1 %cmp.i188, label %.noexc, label %if.end.i193, !dbg !2389 + +if.end.i193: ; preds = %if.then17.i + %call.i221 = invoke i8* @_Znwm(i64 24) #19 + to label %call.i.noexc220 unwind label %lpad14.loopexit, !dbg !2390 + +call.i.noexc220: ; preds = %if.end.i193 + call void @llvm.dbg.value(metadata i32* %25, metadata !2169, metadata !DIExpression()) #2, !dbg !2391 + %els.i.i189 = bitcast i8* %call.i221 to i32**, !dbg !2393 + store i32* %25, i32** %els.i.i189, align 8, !dbg !2394, !tbaa !2176 + %l.i.i190 = getelementptr inbounds i8, i8* %call.i221, i64 8, !dbg !2395 + tail call void @llvm.memset.p0i8.i64(i8* nonnull align 8 %l.i.i190, i8 0, i64 16, i1 false) #2, !dbg !2396 + %call4.i223 = invoke i8* @_Znam(i64 8192) #19 + to label %call4.i.noexc222 unwind label %lpad14.loopexit, !dbg !2397 + +call4.i.noexc222: ; preds = %call.i.noexc220 + %28 = bitcast i8* %call.i221 to %class.Pennant*, !dbg !2390 + call void @llvm.dbg.value(metadata %class.Pennant* %28, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata %class.Pennant* %28, metadata !2166, metadata !DIExpression()) #2, !dbg !2399 + store i8* %call4.i223, i8** %17, align 8, !dbg !2400, !tbaa !2155 + store i32 0, i32* %16, align 8, !dbg !2401, !tbaa !1998 + call void @llvm.dbg.value(metadata i32 0, metadata !2145, metadata !DIExpression()), !dbg !2402 + %29 = load i32, i32* %fill.i191, align 8, !tbaa !1992 + %30 = zext i32 %29 to i64, !dbg !2403 + br label %do.body.i197, !dbg !2403 + +do.body.i197: ; preds = %if.then11.i208.1, %call4.i.noexc222 + %indvars.iv254 = phi i64 [ 0, %call4.i.noexc222 ], [ %indvars.iv.next255.1, %if.then11.i208.1 ], !dbg !2379 + %c.0.i195 = phi %class.Pennant* [ %28, %call4.i.noexc222 ], [ %107, %if.then11.i208.1 ], !dbg !2379 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i64 %indvars.iv254, metadata !2145, metadata !DIExpression()), !dbg !2402 + %cmp7.i196 = icmp ult i64 %indvars.iv254, %30, !dbg !2404 + %31 = load %class.Pennant**, %class.Pennant*** %18, align 8, !dbg !2405, !tbaa !2188 + br i1 %cmp7.i196, label %land.lhs.true.i203, label %if.else.i217, !dbg !2406 + +land.lhs.true.i203: ; preds = %do.body.i197 + %arrayidx9.i201 = getelementptr inbounds %class.Pennant*, %class.Pennant** %31, i64 %indvars.iv254, !dbg !2407 + %32 = load %class.Pennant*, %class.Pennant** %arrayidx9.i201, align 8, !dbg !2407, !tbaa !1901 + %cmp10.i202 = icmp eq %class.Pennant* %32, null, !dbg !2408 + br i1 %cmp10.i202, label %38, label %if.then11.i208, !dbg !2409 + +if.then11.i208: ; preds = %land.lhs.true.i203 + call void @llvm.dbg.value(metadata %class.Pennant* %32, metadata !2193, metadata !DIExpression()), !dbg !2410 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195, metadata !2196, metadata !DIExpression()), !dbg !2412 + %l.i48.i204 = getelementptr inbounds %class.Pennant, %class.Pennant* %32, i64 0, i32 1, !dbg !2413 + %33 = bitcast %class.Pennant** %l.i48.i204 to i64*, !dbg !2413 + %34 = load i64, i64* %33, align 8, !dbg !2413, !tbaa !2202 + %r.i.i205 = getelementptr inbounds %class.Pennant, %class.Pennant* %c.0.i195, i64 0, i32 2, !dbg !2414 + %35 = bitcast %class.Pennant** %r.i.i205 to i64*, !dbg !2415 + store i64 %34, i64* %35, align 8, !dbg !2415, !tbaa !2205 + store %class.Pennant* %c.0.i195, %class.Pennant** %l.i48.i204, align 8, !dbg !2416, !tbaa !2202 + call void @llvm.dbg.value(metadata %class.Pennant* %32, metadata !2144, metadata !DIExpression()), !dbg !2398 + store %class.Pennant* null, %class.Pennant** %arrayidx9.i201, align 8, !dbg !2417, !tbaa !1901 + %indvars.iv.next255 = or i64 %indvars.iv254, 1, !dbg !2418 + call void @llvm.dbg.value(metadata i32 undef, metadata !2145, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %32, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next255, metadata !2145, metadata !DIExpression()), !dbg !2402 + %cmp7.i196.1 = icmp ult i64 %indvars.iv.next255, %30, !dbg !2404 + %36 = load %class.Pennant**, %class.Pennant*** %18, align 8, !dbg !2405, !tbaa !2188 + br i1 %cmp7.i196.1, label %land.lhs.true.i203.1, label %if.else.i217, !dbg !2406 + +if.else.i217: ; preds = %if.then11.i208, %do.body.i197 + %indvars.iv254.lcssa = phi i64 [ %indvars.iv254, %do.body.i197 ], [ %indvars.iv.next255, %if.then11.i208 ], !dbg !2379 + %c.0.i195.lcssa = phi %class.Pennant* [ %c.0.i195, %do.body.i197 ], [ %32, %if.then11.i208 ], !dbg !2379 + %.lcssa337 = phi %class.Pennant** [ %31, %do.body.i197 ], [ %36, %if.then11.i208 ], !dbg !2405 + call void @llvm.dbg.value(metadata i64 %indvars.iv254.lcssa, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i64 %indvars.iv254.lcssa, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i64 %indvars.iv254.lcssa, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + %37 = trunc i64 %indvars.iv254.lcssa to i32, !dbg !2406 + call void @llvm.dbg.value(metadata i64 %indvars.iv254.lcssa, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + %idxprom20.pre-phi.i210 = and i64 %indvars.iv254.lcssa, 4294967295, !dbg !2419 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata i32 %37, metadata !2145, metadata !DIExpression()), !dbg !2402 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + call void @llvm.dbg.value(metadata %class.Pennant* %c.0.i195.lcssa, metadata !2144, metadata !DIExpression()), !dbg !2398 + %arrayidx21.i211 = getelementptr inbounds %class.Pennant*, %class.Pennant** %.lcssa337, i64 %idxprom20.pre-phi.i210, !dbg !2419 + store %class.Pennant* %c.0.i195.lcssa, %class.Pennant** %arrayidx21.i211, align 8, !dbg !2420, !tbaa !1901 + call void @llvm.dbg.value(metadata i32 %29, metadata !2146, metadata !DIExpression()), !dbg !2421 + %add.i212 = add nuw i32 %37, 1, !dbg !2421 + call void @llvm.dbg.value(metadata i32 %add.i212, metadata !2151, metadata !DIExpression()), !dbg !2421 + %xor.i213 = xor i32 %add.i212, %29, !dbg !2421 + br label %39, !dbg !2421 + +;