diff --git a/.github/workflows/libclang-abi-tests.yml b/.github/workflows/libclang-abi-tests.yml index 41896d4392885..155a1fcda7b3a 100644 --- a/.github/workflows/libclang-abi-tests.yml +++ b/.github/workflows/libclang-abi-tests.yml @@ -146,10 +146,12 @@ jobs: uses: actions/download-artifact@v3 with: name: build-baseline + path: build-baseline - name: Download latest uses: actions/download-artifact@v3 with: name: build-latest + path: build-latest - name: Install abi-compliance-checker run: sudo apt-get install abi-compliance-checker diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 424b10c62b256..fe8c134b8554e 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1844,8 +1844,9 @@ void RewriteInstance::adjustCommandLineOptions() { exit(1); } - if (opts::ReorderFunctions != ReorderFunctions::RT_NONE && - !opts::HotText.getNumOccurrences()) { + if (opts::Instrument || + (opts::ReorderFunctions != ReorderFunctions::RT_NONE && + !opts::HotText.getNumOccurrences())) { opts::HotText = true; } else if (opts::HotText && !BC->HasRelocations) { errs() << "BOLT-WARNING: hot text is disabled in non-relocation mode\n"; diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 2ec6f45fc1595..9e6f1756c5707 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -165,6 +165,20 @@ int memcmp(const void *s1, const void *s2, size_t n) { // Anonymous namespace covering everything but our library entry point namespace { +// Get the difference between runtime addrress of .text section and +// static address in section header table. Can be extracted from arbitrary +// pc value recorded at runtime to get the corresponding static address, which +// in turn can be used to search for indirect call description. Needed because +// indirect call descriptions are read-only non-relocatable data. +uint64_t getTextBaseAddress() { + uint64_t DynAddr; + uint64_t StaticAddr; + __asm__ volatile("leaq __hot_end(%%rip), %0\n\t" + "movabsq $__hot_end, %1\n\t" + : "=r"(DynAddr), "=r"(StaticAddr)); + return DynAddr - StaticAddr; +} + constexpr uint32_t BufSize = 10240; #define _STRINGIFY(x) #x @@ -458,6 +472,16 @@ uint64_t __lseek(uint64_t fd, uint64_t pos, uint64_t whence) { return ret; } +int __ftruncate(uint64_t fd, uint64_t length) { + int ret; + __asm__ __volatile__("movq $77, %%rax\n" + "syscall\n" + : "=a"(ret) + : "D"(fd), "S"(length) + : "cc", "rcx", "r11", "memory"); + return ret; +} + int __close(uint64_t fd) { uint64_t ret; __asm__ __volatile__("movq $3, %%rax\n" diff --git a/bolt/runtime/instr.cpp b/bolt/runtime/instr.cpp index ef55ded40431c..96a43f685befa 100644 --- a/bolt/runtime/instr.cpp +++ b/bolt/runtime/instr.cpp @@ -215,6 +215,12 @@ class BumpPtrAllocator { /// __bolt_instr_setup, our initialization routine. BumpPtrAllocator *GlobalAlloc; +// Base address which we substract from recorded PC values when searching for +// indirect call description entries. Needed because indCall descriptions are +// mapped read-only and contain static addresses. Initialized in +// __bolt_instr_setup. +uint64_t TextBaseAddress = 0; + // Storage for GlobalAlloc which can be shared if not using // instrumentation-file-append-pid. void *GlobalMetadataStorage; @@ -1389,7 +1395,7 @@ void visitIndCallCounter(IndirectCallHashTable::MapEntry &Entry, const IndCallDescription *CallsiteDesc = &Ctx->IndCallDescriptions[CallsiteID]; const IndCallTargetDescription *TargetDesc = - Ctx->lookupIndCallTarget(Entry.Key); + Ctx->lookupIndCallTarget(Entry.Key - TextBaseAddress); if (!TargetDesc) { DEBUG(report("Failed to lookup indirect call target\n")); char LineBuf[BufSize]; @@ -1515,11 +1521,15 @@ extern "C" void __bolt_instr_clear_counters() { /// on demand. /// extern "C" void __attribute((force_align_arg_pointer)) -__bolt_instr_data_dump() { +__bolt_instr_data_dump(int FD) { // Already dumping if (!GlobalWriteProfileMutex->acquire()) return; + int ret = __lseek(FD, 0, SEEK_SET); + assert(ret == 0, "Failed to lseek!"); + ret = __ftruncate(FD, 0); + assert(ret == 0, "Failed to ftruncate!"); BumpPtrAllocator HashAlloc; HashAlloc.setMaxSize(0x6400000); ProfileWriterContext Ctx = readDescriptions(); @@ -1527,8 +1537,6 @@ __bolt_instr_data_dump() { DEBUG(printStats(Ctx)); - int FD = openProfile(); - BumpPtrAllocator Alloc; Alloc.setMaxSize(0x6400000); const uint8_t *FuncDesc = Ctx.FuncDescriptions; @@ -1544,7 +1552,6 @@ __bolt_instr_data_dump() { Ctx.CallFlowTable->forEachElement(visitCallFlowEntry, FD, &Ctx); __fsync(FD); - __close(FD); __munmap(Ctx.MMapPtr, Ctx.MMapSize); __close(Ctx.FileDesc); HashAlloc.destroy(); @@ -1557,6 +1564,7 @@ __bolt_instr_data_dump() { void watchProcess() { timespec ts, rem; uint64_t Ellapsed = 0ull; + int FD = openProfile(); uint64_t ppid; if (__bolt_instr_wait_forks) { // Store parent pgid @@ -1568,7 +1576,7 @@ void watchProcess() { ppid = __getppid(); if (ppid == 1) { // Parent already dead - __bolt_instr_data_dump(); + __bolt_instr_data_dump(FD); goto out; } } @@ -1581,7 +1589,7 @@ void watchProcess() { // so no need for us to keep dumping. if (__kill(ppid, 0) < 0) { if (__bolt_instr_no_counters_clear) - __bolt_instr_data_dump(); + __bolt_instr_data_dump(FD); break; } @@ -1589,13 +1597,14 @@ void watchProcess() { continue; Ellapsed = 0; - __bolt_instr_data_dump(); + __bolt_instr_data_dump(FD); if (__bolt_instr_no_counters_clear == false) __bolt_instr_clear_counters(); } out:; DEBUG(report("My parent process is dead, bye!\n")); + __close(FD); __exit(0); } @@ -1606,6 +1615,7 @@ extern "C" void __bolt_instr_indirect_tailcall(); extern "C" void __attribute((force_align_arg_pointer)) __bolt_instr_setup() { __bolt_ind_call_counter_func_pointer = __bolt_instr_indirect_call; __bolt_ind_tailcall_counter_func_pointer = __bolt_instr_indirect_tailcall; + TextBaseAddress = getTextBaseAddress(); const uint64_t CountersStart = reinterpret_cast(&__bolt_instr_locations[0]); @@ -1691,8 +1701,11 @@ extern "C" __attribute((naked)) void __bolt_instr_start() /// This is hooking into ELF's DT_FINI extern "C" void __bolt_instr_fini() { __bolt_fini_trampoline(); - if (__bolt_instr_sleep_time == 0) - __bolt_instr_data_dump(); + if (__bolt_instr_sleep_time == 0) { + int FD = openProfile(); + __bolt_instr_data_dump(FD); + __close(FD); + } DEBUG(report("Finished.\n")); } diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py index fe27af87f9106..3a6da210e01f0 100644 --- a/bolt/test/lit.cfg.py +++ b/bolt/test/lit.cfg.py @@ -72,6 +72,9 @@ if config.gnu_ld: config.available_features.add("gnu_ld") +if lit.util.which("fuser"): + config.available_features.add("fuser") + llvm_config.use_default_substitutions() llvm_config.config.environment["CLANG"] = config.bolt_clang diff --git a/bolt/test/runtime/instrumentation-indirect-2.c b/bolt/test/runtime/instrumentation-indirect-2.c new file mode 100644 index 0000000000000..7d19db14b77f0 --- /dev/null +++ b/bolt/test/runtime/instrumentation-indirect-2.c @@ -0,0 +1,168 @@ +// Check that indirect call hash tables properly register multiple calls, +// and that calls from different processes don't get mixed up when using +// --instrumentation-file-append-pid. + +#include +#include +#include + +__attribute__((noinline)) void funcA(int pid) { printf("funcA %d\n", pid); } +__attribute__((noinline)) void funcB(int pid) { printf("funcB %d\n", pid); } +__attribute__((noinline)) void funcC(int pid) { printf("funcC %d\n", pid); } +__attribute__((noinline)) void funcD(int pid) { printf("funcD %d\n", pid); } +__attribute__((noinline)) void funcE(int pid) { printf("funcE %d\n", pid); } +__attribute__((noinline)) void funcF(int pid) { printf("funcF %d\n", pid); } +__attribute__((noinline)) void funcG(int pid) { printf("funcG %d\n", pid); } +__attribute__((noinline)) void funcH(int pid) { printf("funcH %d\n", pid); } +__attribute__((noinline)) void funcI(int pid) { printf("funcI %d\n", pid); } +__attribute__((noinline)) void funcJ(int pid) { printf("funcJ %d\n", pid); } +__attribute__((noinline)) void funcK(int pid) { printf("funcK %d\n", pid); } +__attribute__((noinline)) void funcL(int pid) { printf("funcL %d\n", pid); } +__attribute__((noinline)) void funcM(int pid) { printf("funcM %d\n", pid); } +__attribute__((noinline)) void funcN(int pid) { printf("funcN %d\n", pid); } +__attribute__((noinline)) void funcO(int pid) { printf("funcO %d\n", pid); } +__attribute__((noinline)) void funcP(int pid) { printf("funcP %d\n", pid); } + +int main() { + + void (*funcs[])(int) = {funcA, funcB, funcC, funcD, funcE, funcF, + funcG, funcH, funcI, funcJ, funcK, funcL, + funcM, funcN, funcO, funcP}; + int i; + + switch (fork()) { + case -1: + printf("Failed to fork!\n"); + exit(-1); + break; + case 0: + i = 0; + break; + default: + i = 1; + break; + } + int pid = getpid(); + for (; i < sizeof(funcs) / sizeof(void *); i += 2) { + funcs[i](pid); + } + + return 0; +} +/* +REQUIRES: system-linux,shell,fuser + +RUN: %clang %cflags %s -o %t.exe -Wl,-q -pie -fpie + +RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t.fdata \ +RUN: --conservative-instrumentation -o %t.instrumented_conservative \ +RUN: --instrumentation-sleep-time=1 --instrumentation-no-counters-clear \ +RUN: --instrumentation-wait-forks + +# Instrumented program needs to finish returning zero +# Both output and profile must contain all 16 functions +RUN: %t.instrumented_conservative > %t.output +# Wait for profile and output to be fully written +RUN: bash %S/wait_file.sh %t.output +RUN: bash %S/wait_file.sh %t.fdata +RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT +RUN: cat %t.fdata | FileCheck %s --check-prefix=CHECK-COMMON-PROF + +CHECK-OUTPUT-DAG: funcA +CHECK-OUTPUT-DAG: funcB +CHECK-OUTPUT-DAG: funcC +CHECK-OUTPUT-DAG: funcD +CHECK-OUTPUT-DAG: funcE +CHECK-OUTPUT-DAG: funcF +CHECK-OUTPUT-DAG: funcG +CHECK-OUTPUT-DAG: funcH +CHECK-OUTPUT-DAG: funcI +CHECK-OUTPUT-DAG: funcJ +CHECK-OUTPUT-DAG: funcK +CHECK-OUTPUT-DAG: funcL +CHECK-OUTPUT-DAG: funcM +CHECK-OUTPUT-DAG: funcN +CHECK-OUTPUT-DAG: funcO +CHECK-OUTPUT-DAG: funcP + +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1 +CHECK-COMMON-PROF-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1 + +RUN: llvm-bolt %t.exe --instrument --instrumentation-file=%t \ +RUN: --instrumentation-file-append-pid \ +RUN: -o %t.instrumented + +RUN: %t.instrumented > %t.output +# Wait till output is fully written in case child outlives parent +RUN: bash %S/wait_file.sh %t.output +# Make sure all functions were called +RUN: cat %t.output | FileCheck %s --check-prefix=CHECK-OUTPUT + +RUN: child_pid=$(cat %t.output | grep funcA | awk '{print $2;}') +RUN: par_pid=$(cat %t.output | grep funcB | awk '{print $2;}') + +RUN: bash %S/wait_file.sh %t.$child_pid.fdata +RUN: bash %S/wait_file.sh %t.$par_pid.fdata + +RUN: mv %t.$child_pid.fdata %t.child.fdata +RUN: mv %t.$par_pid.fdata %t.parent.fdata + +# Instrumented binary must produce two profiles with only local calls +# recorded. Functions called only in child should not appear in parent's +# process and vice versa. +RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-CHILD +RUN: cat %t.child.fdata | FileCheck %s --check-prefix=CHECK-NOCHILD +RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-PARENT +RUN: cat %t.parent.fdata | FileCheck %s --check-prefix=CHECK-NOPARENT + +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcA 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcC 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcE 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcG 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcI 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcK 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcM 0 0 1 +CHECK-CHILD-DAG: 1 main {{[0-9a-f]+}} 1 funcO 0 0 1 + +CHECK-NOCHILD-NOT: funcB +CHECK-NOCHILD-NOT: funcD +CHECK-NOCHILD-NOT: funcF +CHECK-NOCHILD-NOT: funcH +CHECK-NOCHILD-NOT: funcJ +CHECK-NOCHILD-NOT: funcL +CHECK-NOCHILD-NOT: funcN +CHECK-NOCHILD-NOT: funcP + +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcB 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcD 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcF 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcH 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcJ 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcL 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcN 0 0 1 +CHECK-PARENT-DAG: 1 main {{[0-9a-f]+}} 1 funcP 0 0 1 + +CHECK-NOPARENT-NOT: funcA +CHECK-NOPARENT-NOT: funcC +CHECK-NOPARENT-NOT: funcE +CHECK-NOPARENT-NOT: funcG +CHECK-NOPARENT-NOT: funcI +CHECK-NOPARENT-NOT: funcK +CHECK-NOPARENT-NOT: funcM +CHECK-NOPARENT-NOT: funcO + + */ diff --git a/bolt/test/runtime/wait_file.sh b/bolt/test/runtime/wait_file.sh new file mode 100644 index 0000000000000..42d4c5b29e795 --- /dev/null +++ b/bolt/test/runtime/wait_file.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +check_file() { + local file="$1" + if [ -z "$file" ]; then + echo "No file passed!" + exit 1 + fi + if [ ! -f "$file" ]; then + return 1 + fi + + fuser -s "$file" + local ret=$? + if [ $ret -eq 1 ]; then # noone has file open + return 0 + fi + if [ $ret -eq 0 ]; then # file open by some processes + return 1 + fi + if [ $ret -eq 127 ]; then + echo "fuser command not found!" + exit 1 + fi + + echo "Unexpected exit code $ret from fuser!" + exit 1 +} + +wait_file() { + local file="$1" + local max_sleep=10 + check_file "$file" + local ret=$? + while [ $ret -ne 0 ] && [ $max_sleep -ne 0 ]; do + sleep 1 + max_sleep=$((max_sleep - 1)) + check_file $file + ret=$? + done + if [ $max_sleep -eq 0 ]; then + echo "The file does not exist or the test hung!" + exit 1 + fi + +} +file="$1" +wait_file "$file" diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp index d44d1e272b9b7..8b542d0b2dec2 100644 --- a/clang-tools-extra/clangd/ClangdServer.cpp +++ b/clang-tools-extra/clangd/ClangdServer.cpp @@ -34,6 +34,7 @@ #include "support/MemoryTree.h" #include "support/ThreadsafeFS.h" #include "support/Trace.h" +#include "clang/Basic/Stack.h" #include "clang/Format/Format.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/CompilationDatabase.h" @@ -52,8 +53,8 @@ #include #include #include -#include #include +#include namespace clang { namespace clangd { @@ -112,6 +113,7 @@ struct UpdateIndexCallbacks : public ParsingCallbacks { FIndex(FIndex), // shared_ptr extends lifetime Stdlib(Stdlib)]() mutable { + clang::noteBottomOfStack(); IndexFileIn IF; IF.Symbols = indexStandardLibrary(std::move(CI), Loc, *TFS); if (Stdlib->isBest(LO)) diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index 9708c67ca2883..b2c04ac4d5463 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -70,6 +70,8 @@ bool isIgnored(llvm::StringRef HeaderPath, HeaderFilter IgnoreHeaders) { bool mayConsiderUnused( const Inclusion &Inc, ParsedAST &AST, const include_cleaner::PragmaIncludes *PI) { + if (PI && PI->shouldKeep(Inc.HashLine + 1)) + return false; // FIXME(kirillbobyrev): We currently do not support the umbrella headers. // System headers are likely to be standard library headers. // Until we have good support for umbrella headers, don't warn about them. @@ -81,8 +83,6 @@ bool mayConsiderUnused( AST.getIncludeStructure().getRealPath(HID)); assert(FE); if (PI) { - if (PI->shouldKeep(Inc.HashLine + 1)) - return false; // Check if main file is the public interface for a private header. If so we // shouldn't diagnose it as unused. if (auto PHeader = PI->getPublic(*FE); !PHeader.empty()) { diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp index dd2ce16147a5d..324ba1fc8cb89 100644 --- a/clang-tools-extra/clangd/TUScheduler.cpp +++ b/clang-tools-extra/clangd/TUScheduler.cpp @@ -63,6 +63,7 @@ #include "support/ThreadCrashReporter.h" #include "support/Threading.h" #include "support/Trace.h" +#include "clang/Basic/Stack.h" #include "clang/Frontend/CompilerInvocation.h" #include "clang/Tooling/CompilationDatabase.h" #include "llvm/ADT/FunctionExtras.h" @@ -464,6 +465,10 @@ class PreambleThread { } void run() { + // We mark the current as the stack bottom so that clang running on this + // thread can notice the stack usage and prevent stack overflow with best + // efforts. Same applies to other calls thoughout clangd. + clang::noteBottomOfStack(); while (true) { std::optional Throttle; { @@ -1383,6 +1388,7 @@ void ASTWorker::startTask(llvm::StringRef Name, } void ASTWorker::run() { + clang::noteBottomOfStack(); while (true) { { std::unique_lock Lock(Mutex); @@ -1777,6 +1783,7 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File, Ctx = Context::current().derive(FileBeingProcessed, std::string(File)), Action = std::move(Action), this]() mutable { + clang::noteBottomOfStack(); ThreadCrashReporter ScopedReporter([&Name, &Contents, &Command]() { llvm::errs() << "Signalled during preamble action: " << Name << "\n"; crashDumpCompileCommand(llvm::errs(), Command); diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp index c35de750435cc..7ef9511cf7c07 100644 --- a/clang-tools-extra/clangd/index/Background.cpp +++ b/clang-tools-extra/clangd/index/Background.cpp @@ -30,6 +30,7 @@ #include "support/Trace.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/Stack.h" #include "clang/Frontend/FrontendAction.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseSet.h" @@ -108,6 +109,7 @@ BackgroundIndex::BackgroundIndex( for (unsigned I = 0; I < Opts.ThreadPoolSize; ++I) { ThreadPool.runAsync("background-worker-" + llvm::Twine(I + 1), [this, Ctx(Context::current().clone())]() mutable { + clang::noteBottomOfStack(); WithContext BGContext(std::move(Ctx)); Queue.work([&] { Rebuilder.idle(); }); }); diff --git a/clang-tools-extra/clangd/test/infinite-instantiation.test b/clang-tools-extra/clangd/test/infinite-instantiation.test new file mode 100644 index 0000000000000..85a1b656f4908 --- /dev/null +++ b/clang-tools-extra/clangd/test/infinite-instantiation.test @@ -0,0 +1,13 @@ +// RUN: cp %s %t.cpp +// RUN: not clangd -check=%t.cpp 2>&1 | FileCheck -strict-whitespace %s + +// CHECK: [template_recursion_depth_exceeded] + +template +constexpr int f(T... args) { + return f(0, args...); +} + +int main() { + auto i = f(); +} diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index ca5cced197cd2..f656a8c587c65 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -29,6 +29,7 @@ #include "support/ThreadCrashReporter.h" #include "support/ThreadsafeFS.h" #include "support/Trace.h" +#include "clang/Basic/Stack.h" #include "clang/Format/Format.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" @@ -710,6 +711,9 @@ enum class ErrorResultCode : int { }; int clangdMain(int argc, char *argv[]) { + // Clang could run on the main thread. e.g., when the flag '-check' or '-sync' + // is enabled. + clang::noteBottomOfStack(); llvm::InitializeAllTargetInfos(); llvm::sys::PrintStackTraceOnErrorSignal(argv[0]); llvm::sys::AddSignalHandler( diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp index c55351fb1f91d..83a7c45df1695 100644 --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -76,6 +76,8 @@ TEST(IncludeCleaner, StdlibUnused) { auto TU = TestTU::withCode(R"cpp( #include #include + #include // IWYU pragma: keep + #include // IWYU pragma: export std::list x; )cpp"); // Layout of std library impl is not relevant. @@ -84,10 +86,13 @@ TEST(IncludeCleaner, StdlibUnused) { namespace std { template class list {}; template class queue {}; + template class vector {}; } )cpp"; TU.AdditionalFiles["list"] = "#include "; TU.AdditionalFiles["queue"] = "#include "; + TU.AdditionalFiles["vector"] = "#include "; + TU.AdditionalFiles["string"] = "#include "; TU.ExtraArgs = {"-isystem", testRoot()}; auto AST = TU.build(); IncludeCleanerFindings Findings = computeIncludeCleanerFindings(AST); diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5add59680fd76..b161be3a07752 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -273,6 +273,10 @@ Non-comprehensive list of changes in this release types. This allows access to ``llvm.nearbyint`` for arbitrary floating-point and vector of floating-point types. - Clang AST matcher now matches concept declarations with `conceptDecl`. +- Clang now supports more GCC stdio builtins: ``__builtin_vprintf``, ``__builtin_vfprintf``, + ``__builtin_fscanf``, ``__builtin_scanf``, ``__builtin_sscanf``, ``__builtin_vfscanf``, + ``__builtin_vscanf``, ``__builtin_vsscanf``. + New Compiler Flags ------------------ @@ -293,7 +297,11 @@ New Compiler Flags - ``-print-multi-flags-experimental`` prints the flags used for multilib selection. See `the multilib docs `_ for more details. - +- ``-maix32`` and ``-maix64`` are new GCC compatibility flags that select the + bitmode to target on AIX. +- ``-p`` is a new GCC compatibility flag for AIX and Linux which works + similarly to ``-pg`` by writing profile information, but targets the ``prof`` + tool as opposed to the ``gprof`` tool. Deprecated Compiler Flags ------------------------- @@ -328,7 +336,9 @@ Attribute Changes in Clang the flag ``-Wunsafe-buffer-usage`` is enabled. - ``__declspec`` attributes can now be used together with the using keyword. Before the attributes on ``__declspec`` was ignored, while now it will be forwarded to the - point where the alias is used. + point where the alias is used. Note, some incorrect uses of ``__declspec`` on a + ``using`` declaration were being silently ignored and will now be appropriately + diagnosed as ignoring the attribute. - Introduced a new ``USR`` (unified symbol resolution) clause inside of the existing ``__attribute__((external_source_symbol))`` attribute. Clang's indexer uses the optional USR value when indexing Clang's AST. This value is expected @@ -467,9 +477,8 @@ Improvements to Clang's diagnostics - ``-Wformat`` will no longer suggest a no-op fix-it for fixing scoped enum format warnings. Instead, it will suggest casting the enum object to the type specified in the format string. -- Clang now emits ``-Wconstant-logical-operand`` warning even when constant logical - operand is on left side. - (`#37919 `_) +- Clang contexpr evaluator now displays notes as well as an error when a constructor + of a base class is not called in the constructor of its derived class. Bug Fixes in This Version ------------------------- @@ -702,6 +711,21 @@ Bug Fixes in This Version - Fix a hang on valid C code passing a function type as an argument to ``typeof`` to form a function declaration. (`#64713 _`) +- Fixed an issue where accesses to the local variables of a coroutine during + ``await_suspend`` could be misoptimized, including accesses to the awaiter + object itself. + (`#56301 `_) + The current solution may bring performance regressions if the awaiters have + non-static data members. See + `#64945 `_ for details. +- Clang now correctly diagnoses ``function_needs_feature`` when always_inline + callee has incompatible target features with caller. +- Removed the linking of libraries when ``-r`` is passed to the driver on AIX. +- Fixed an Itanium ABI bug where we force exactly two-byte alignment on member + functions to reserve a bit in function pointers for identifying pointers to + virtual member functions even if the target required a greater function + alignment and/or did not have function pointers which point to function entry + points (i.e., uses function descriptor objects instead). Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -822,6 +846,13 @@ Bug Fixes to C++ Support - Fix constraint checking of non-generic lambdas. (`#63181 `_) +- Update ``FunctionDeclBitfields.NumFunctionDeclBits``. This fixes: + (`#64171 `_). + +- Fix a crash caused by substitution failure in expression requirements. + (`#64172 `_) and + (`#64723 `_). + Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -977,10 +1008,19 @@ CUDA Support AIX Support ^^^^^^^^^^^ -- Add an AIX-only link-time option, `-mxcoff-build-id=0xHEXSTRING`, to allow users - to embed a hex id in their binary such that it's readable by the program itself. - This option is an alternative to the `--build-id=0xHEXSTRING` GNU linker option - which is currently not supported by the AIX linker. +- Enabled ThinLTO support. Minimum OS requirement is AIX 7.2 TL5 SP6 or + the upcoming AIX 7.3 TL2. + +- Enabled integrated assembler (``-f[no-]integrated-as``) for LTO. LTO now + defaults to the integrated assembler. + +- Enabled Clang-based instrumented profiling + (``-fprofile-instr-[generate|use]``). + +- Added an AIX-only link-time option, ``-mxcoff-build-id=0xHEXSTRING``, to allow + users to embed a hex id in their binary such that it's readable by the program + itself. This option is an alternative to the ``--build-id=0xHEXSTRING`` GNU + linker option, which is currently not supported by the AIX linker. - Introduced the ``-mxcoff-roptr`` option to place constant objects with relocatable address values in the read-only data section. This option should @@ -989,6 +1029,14 @@ AIX Support read-only data sections with relocatable address values that resolve to imported symbols are made writable. +- Implemented the ``-frecord-command-line`` option on AIX, which saves the + command-line options used from compiling a source file to the corresponding + object file or binary file. + +- Added a new linker option, ``-K``, that is used to align the header, text, + data, and loader sections of the output file so that each section begins on + a page boundary. + WebAssembly Support ^^^^^^^^^^^^^^^^^^^ - Shared library support (and PIC code generation) for WebAssembly is no longer @@ -1005,6 +1053,11 @@ AVR Support of ``USHRT_MAX`` is now ``unsigned int`` instead of ``int``, as required by the C standard. +PowerPC Support +^^^^^^^^^^^^^^^ +- Clang now emits errors when it detects incompatible target features for + PowerPC builtins. + DWARF Support in Clang ---------------------- diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 1b99709ca90d9..12137387b676a 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -1702,7 +1702,7 @@ class DeclContext { }; /// Number of non-inherited bits in FunctionDeclBitfields. - enum { NumFunctionDeclBits = 30 }; + enum { NumFunctionDeclBits = 31 }; /// Stores the bits used by CXXConstructorDecl. If modified /// NumCXXConstructorDeclBits and the accessor @@ -1714,12 +1714,12 @@ class DeclContext { /// For the bits in FunctionDeclBitfields. uint64_t : NumFunctionDeclBits; - /// 21 bits to fit in the remaining available space. + /// 20 bits to fit in the remaining available space. /// Note that this makes CXXConstructorDeclBitfields take /// exactly 64 bits and thus the width of NumCtorInitializers /// will need to be shrunk if some bit is added to NumDeclContextBitfields, /// NumFunctionDeclBitfields or CXXConstructorDeclBitfields. - uint64_t NumCtorInitializers : 18; + uint64_t NumCtorInitializers : 17; uint64_t IsInheritingConstructor : 1; /// Whether this constructor has a trail-allocated explicit specifier. diff --git a/clang/include/clang/AST/ExprConcepts.h b/clang/include/clang/AST/ExprConcepts.h index d900e980852b4..13d4568119eb2 100644 --- a/clang/include/clang/AST/ExprConcepts.h +++ b/clang/include/clang/AST/ExprConcepts.h @@ -14,20 +14,21 @@ #ifndef LLVM_CLANG_AST_EXPRCONCEPTS_H #define LLVM_CLANG_AST_EXPRCONCEPTS_H -#include "clang/AST/ASTContext.h" #include "clang/AST/ASTConcept.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" -#include "clang/AST/DeclarationName.h" #include "clang/AST/DeclTemplate.h" +#include "clang/AST/DeclarationName.h" #include "clang/AST/Expr.h" #include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/TemplateBase.h" #include "clang/AST/Type.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TrailingObjects.h" -#include #include +#include namespace clang { class ASTStmtReader; @@ -467,6 +468,13 @@ class NestedRequirement : public Requirement { } }; +using EntityPrinter = llvm::function_ref; + +/// \brief create a Requirement::SubstitutionDiagnostic with only a +/// SubstitutedEntity and DiagLoc using Sema's allocator. +Requirement::SubstitutionDiagnostic * +createSubstDiagAt(Sema &S, SourceLocation Location, EntityPrinter Printer); + } // namespace concepts /// C++2a [expr.prim.req]: diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td index 566cdc3406058..0794ed7ba6837 100644 --- a/clang/include/clang/Basic/DiagnosticASTKinds.td +++ b/clang/include/clang/Basic/DiagnosticASTKinds.td @@ -70,6 +70,8 @@ def note_consteval_address_accessible : Note< "is not a constant expression">; def note_constexpr_uninitialized : Note< "subobject %0 is not initialized">; +def note_constexpr_uninitialized_base : Note< + "constructor of base class %0 is not called">; def note_constexpr_static_local : Note< "control flows through the definition of a %select{static|thread_local}0 variable">; def note_constexpr_subobject_declared_here : Note< diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 7b4d415bf0649..26bc88a980e4f 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -20,6 +20,7 @@ def DeprecatedStaticAnalyzerFlag : DiagGroup<"deprecated-static-analyzer-flag">; // Empty DiagGroups are recognized by clang but ignored. def ODR : DiagGroup<"odr">; def : DiagGroup<"abi">; +def : DiagGroup<"gnu-empty-initializer">; // Now a C extension, not GNU. def AbsoluteValue : DiagGroup<"absolute-value">; def MisspelledAssumption : DiagGroup<"misspelled-assumption">; def UnknownAssumption : DiagGroup<"unknown-assumption">; diff --git a/clang/include/clang/Basic/Sanitizers.h b/clang/include/clang/Basic/Sanitizers.h index db53010645ae3..4659e45c78834 100644 --- a/clang/include/clang/Basic/Sanitizers.h +++ b/clang/include/clang/Basic/Sanitizers.h @@ -23,7 +23,11 @@ namespace llvm { class hash_code; +class Triple; +namespace opt { +class ArgList; } +} // namespace llvm namespace clang { diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 41ef47eb565b1..61be52149341f 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1414,7 +1414,9 @@ class TargetInfo : public TransferrableTargetInfo, /// Identify whether this target supports IFuncs. bool supportsIFunc() const { - return getTriple().isOSBinFormatELF() && !getTriple().isOSFuchsia(); + return getTriple().isOSBinFormatELF() && + ((getTriple().isOSLinux() && !getTriple().isMusl()) || + getTriple().isOSFreeBSD()); } // Validate the contents of the __builtin_cpu_supports(const char*) diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 7acacd7bf4f50..76000156fece7 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -9612,9 +9612,8 @@ bool ASTContext::areLaxCompatibleRVVTypes(QualType FirstType, const LangOptions::LaxVectorConversionKind LVCKind = getLangOpts().getLaxVectorConversions(); - // If __riscv_v_fixed_vlen != N do not allow GNU vector lax conversion. - if (VecTy->getVectorKind() == VectorType::GenericVector && - getTypeSize(SecondType) != getRVVTypeSize(*this, BT)) + // If __riscv_v_fixed_vlen != N do not allow vector lax conversion. + if (getTypeSize(SecondType) != getRVVTypeSize(*this, BT)) return false; // If -flax-vector-conversions=all is specified, the types are diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 2f2f4eef852fd..f1bad0c7f7f22 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -2418,9 +2418,16 @@ static bool CheckEvaluationResult(CheckEvaluationResultKind CERK, if (const CXXRecordDecl *CD = dyn_cast(RD)) { unsigned BaseIndex = 0; for (const CXXBaseSpecifier &BS : CD->bases()) { - if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), - Value.getStructBase(BaseIndex), Kind, - /*SubobjectDecl=*/nullptr, CheckedTemps)) + const APValue &BaseValue = Value.getStructBase(BaseIndex); + if (!BaseValue.hasValue()) { + SourceLocation TypeBeginLoc = BS.getBaseTypeLoc(); + Info.FFDiag(TypeBeginLoc, diag::note_constexpr_uninitialized_base) + << BS.getType() << SourceRange(TypeBeginLoc, BS.getEndLoc()); + return false; + } + if (!CheckEvaluationResult(CERK, Info, DiagLoc, BS.getType(), BaseValue, + Kind, /*SubobjectDecl=*/nullptr, + CheckedTemps)) return false; ++BaseIndex; } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index fcc1620f7a043..0d1e9ad439b7d 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5239,30 +5239,50 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, dyn_cast(ArgInfo.getCoerceToType()); if (STy && ArgInfo.isDirect() && ArgInfo.getCanBeFlattened()) { llvm::Type *SrcTy = Src.getElementType(); - uint64_t SrcSize = CGM.getDataLayout().getTypeAllocSize(SrcTy); - uint64_t DstSize = CGM.getDataLayout().getTypeAllocSize(STy); - - // If the source type is smaller than the destination type of the - // coerce-to logic, copy the source value into a temp alloca the size - // of the destination type to allow loading all of it. The bits past - // the source value are left undef. - if (SrcSize < DstSize) { - Address TempAlloca - = CreateTempAlloca(STy, Src.getAlignment(), - Src.getName() + ".coerce"); - Builder.CreateMemCpy(TempAlloca, Src, SrcSize); - Src = TempAlloca; + llvm::TypeSize SrcTypeSize = + CGM.getDataLayout().getTypeAllocSize(SrcTy); + llvm::TypeSize DstTypeSize = CGM.getDataLayout().getTypeAllocSize(STy); + if (SrcTypeSize.isScalable()) { + assert(STy->containsHomogeneousScalableVectorTypes() && + "ABI only supports structure with homogeneous scalable vector " + "type"); + assert(SrcTypeSize == DstTypeSize && + "Only allow non-fractional movement of structure with " + "homogeneous scalable vector type"); + assert(NumIRArgs == STy->getNumElements()); + + llvm::Value *StoredStructValue = + Builder.CreateLoad(Src, Src.getName() + ".tuple"); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + llvm::Value *Extract = Builder.CreateExtractValue( + StoredStructValue, i, Src.getName() + ".extract" + Twine(i)); + IRCallArgs[FirstIRArg + i] = Extract; + } } else { - Src = Src.withElementType(STy); - } + uint64_t SrcSize = SrcTypeSize.getFixedValue(); + uint64_t DstSize = DstTypeSize.getFixedValue(); + + // If the source type is smaller than the destination type of the + // coerce-to logic, copy the source value into a temp alloca the size + // of the destination type to allow loading all of it. The bits past + // the source value are left undef. + if (SrcSize < DstSize) { + Address TempAlloca = CreateTempAlloca(STy, Src.getAlignment(), + Src.getName() + ".coerce"); + Builder.CreateMemCpy(TempAlloca, Src, SrcSize); + Src = TempAlloca; + } else { + Src = Src.withElementType(STy); + } - assert(NumIRArgs == STy->getNumElements()); - for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { - Address EltPtr = Builder.CreateStructGEP(Src, i); - llvm::Value *LI = Builder.CreateLoad(EltPtr); - if (ArgHasMaybeUndefAttr) - LI = Builder.CreateFreeze(LI); - IRCallArgs[FirstIRArg + i] = LI; + assert(NumIRArgs == STy->getNumElements()); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + Address EltPtr = Builder.CreateStructGEP(Src, i); + llvm::Value *LI = Builder.CreateLoad(EltPtr); + if (ArgHasMaybeUndefAttr) + LI = Builder.CreateFreeze(LI); + IRCallArgs[FirstIRArg + i] = LI; + } } } else { // In the simple case, just pass the coerced loaded value. @@ -5467,6 +5487,30 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline); } + // The await_suspend call performed by co_await is essentially asynchronous + // to the execution of the coroutine. Inlining it normally into an unsplit + // coroutine can cause miscompilation because the coroutine CFG misrepresents + // the true control flow of the program: things that happen in the + // await_suspend are not guaranteed to happen prior to the resumption of the + // coroutine, and things that happen after the resumption of the coroutine + // (including its exit and the potential deallocation of the coroutine frame) + // are not guaranteed to happen only after the end of await_suspend. + // + // The short-term solution to this problem is to mark the call as uninlinable. + // But we don't want to do this if the call is known to be trivial, which is + // very common. + // + // The long-term solution may introduce patterns like: + // + // call @llvm.coro.await_suspend(ptr %awaiter, ptr %handle, + // ptr @awaitSuspendFn) + // + // Then it is much easier to perform the safety analysis in the middle end. + // If it is safe to inline the call to awaitSuspend, we can replace it in the + // CoroEarly pass. Otherwise we could replace it in the CoroSplit pass. + if (inSuspendBlock() && mayCoroHandleEscape()) + Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); + // Disable inlining inside SEH __try blocks. if (isSEHTryScope()) { Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::NoInline); diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 8437cda79beb2..810ae7d51ec10 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -139,6 +139,36 @@ static bool memberCallExpressionCanThrow(const Expr *E) { return true; } +/// Return true when the coroutine handle may escape from the await-suspend +/// (`awaiter.await_suspend(std::coroutine_handle)` expression). +/// Return false only when the coroutine wouldn't escape in the await-suspend +/// for sure. +/// +/// While it is always safe to return true, return falses can bring better +/// performances. +/// +/// See https://github.com/llvm/llvm-project/issues/56301 and +/// https://reviews.llvm.org/D157070 for the example and the full discussion. +/// +/// FIXME: It will be much better to perform such analysis in the middle end. +/// See the comments in `CodeGenFunction::EmitCall` for example. +static bool MayCoroHandleEscape(CoroutineSuspendExpr const &S) { + CXXRecordDecl *Awaiter = + S.getCommonExpr()->getType().getNonReferenceType()->getAsCXXRecordDecl(); + + // Return true conservatively if the awaiter type is not a record type. + if (!Awaiter) + return true; + + // In case the awaiter type is empty, the suspend wouldn't leak the coroutine + // handle. + // + // TODO: We can improve this by looking into the implementation of + // await-suspend and see if the coroutine handle is passed to foreign + // functions. + return !Awaiter->field_empty(); +} + // Emit suspend expression which roughly looks like: // // auto && x = CommonExpr(); @@ -199,8 +229,11 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); CGF.CurCoro.InSuspendBlock = true; + CGF.CurCoro.MayCoroHandleEscape = MayCoroHandleEscape(S); auto *SuspendRet = CGF.EmitScalarExpr(S.getSuspendExpr()); CGF.CurCoro.InSuspendBlock = false; + CGF.CurCoro.MayCoroHandleEscape = false; + if (SuspendRet != nullptr && SuspendRet->getType()->isIntegerTy(1)) { // Veto suspension if requested by bool returning await_suspend. BasicBlock *RealSuspendBlock = diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 8722fd4550e4a..28ec2b9700721 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -334,6 +334,7 @@ class CodeGenFunction : public CodeGenTypeCache { struct CGCoroInfo { std::unique_ptr Data; bool InSuspendBlock = false; + bool MayCoroHandleEscape = false; CGCoroInfo(); ~CGCoroInfo(); }; @@ -347,6 +348,10 @@ class CodeGenFunction : public CodeGenTypeCache { return isCoroutine() && CurCoro.InSuspendBlock; } + bool mayCoroHandleEscape() const { + return isCoroutine() && CurCoro.MayCoroHandleEscape; + } + /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index a3506df7d4e5a..f09d1129b128a 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2386,7 +2386,7 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D, // functions. If the current target's C++ ABI requires this and this is a // member function, set its alignment accordingly. if (getTarget().getCXXABI().areMemberFunctionsAligned()) { - if (F->getPointerAlignment(getDataLayout()) < 2 && isa(D)) + if (isa(D) && F->getPointerAlignment(getDataLayout()) < 2) F->setAlignment(std::max(llvm::Align(2), F->getAlign().valueOrOne())); } diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f6ea4d0b43667..bdbdad9362e19 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4936,6 +4936,12 @@ void Driver::BuildJobs(Compilation &C) const { (void)C.getArgs().hasArg(options::OPT_driver_mode); (void)C.getArgs().hasArg(options::OPT_rsp_quoting); + bool HasAssembleJob = llvm::any_of(C.getJobs(), [](auto &J) { + // Match ClangAs and other derived assemblers of Tool. ClangAs uses a + // longer ShortName "clang integrated assembler" while other assemblers just + // use "assembler". + return strstr(J.getCreator().getShortName(), "assembler"); + }); for (Arg *A : C.getArgs()) { // FIXME: It would be nice to be able to send the argument to the // DiagnosticsEngine, so that extra values, position, and so on could be @@ -4965,7 +4971,7 @@ void Driver::BuildJobs(Compilation &C) const { // already been warned about. if (!IsCLMode() || !A->getOption().matches(options::OPT_UNKNOWN)) { if (A->getOption().hasFlag(options::TargetSpecific) && - !A->isIgnoredTargetSpecific()) { + !A->isIgnoredTargetSpecific() && !HasAssembleJob) { Diag(diag::err_drv_unsupported_opt_for_target) << A->getSpelling() << getTargetTriple(); } else { diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index c3ce13f93464d..12fe55be9113e 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -37,6 +37,8 @@ static const SanitizerMask NeedsUbsanCxxRt = SanitizerKind::Vptr | SanitizerKind::CFI; static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr; static const SanitizerMask NotAllowedWithMinimalRuntime = SanitizerKind::Vptr; +static const SanitizerMask NotAllowedWithExecuteOnly = + SanitizerKind::Function | SanitizerKind::KCFI; static const SanitizerMask RequiresPIE = SanitizerKind::DataFlow | SanitizerKind::Scudo; static const SanitizerMask NeedsUnwindTables = @@ -141,6 +143,16 @@ static std::string describeSanitizeArg(const llvm::opt::Arg *A, /// Sanitizers set. static std::string toString(const clang::SanitizerSet &Sanitizers); +/// Return true if an execute-only target disallows data access to code +/// sections. +static bool isExecuteOnlyTarget(const llvm::Triple &Triple, + const llvm::opt::ArgList &Args) { + if (Triple.isPS5()) + return true; + return Args.hasFlagNoClaim(options::OPT_mexecute_only, + options::OPT_mno_execute_only, false); +} + static void validateSpecialCaseListFormat(const Driver &D, std::vector &SCLFiles, unsigned MalformedSCLErrorDiagID, @@ -395,6 +407,22 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, DiagnosedKinds |= SanitizerKind::Function; } } + // -fsanitize=function and -fsanitize=kcfi instrument indirect function + // calls to load a type hash before the function label. Therefore, an + // execute-only target doesn't support the function and kcfi sanitizers. + const llvm::Triple &Triple = TC.getTriple(); + if (isExecuteOnlyTarget(Triple, Args)) { + if (SanitizerMask KindsToDiagnose = + Add & NotAllowedWithExecuteOnly & ~DiagnosedKinds) { + if (DiagnoseErrors) { + std::string Desc = describeSanitizeArg(Arg, KindsToDiagnose); + D.Diag(diag::err_drv_argument_not_allowed_with) + << Desc << Triple.str(); + } + DiagnosedKinds |= KindsToDiagnose; + } + Add &= ~NotAllowedWithExecuteOnly; + } // FIXME: Make CFI on member function calls compatible with cross-DSO CFI. // There are currently two problems: @@ -457,6 +485,10 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, if (MinimalRuntime) { Add &= ~NotAllowedWithMinimalRuntime; } + // NotAllowedWithExecuteOnly is silently discarded on an execute-only + // target if implicitly enabled through group expansion. + if (isExecuteOnlyTarget(Triple, Args)) + Add &= ~NotAllowedWithExecuteOnly; if (CfiCrossDso) Add &= ~SanitizerKind::CFIMFCall; Add &= Supported; diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 97217eba9ca01..bfc86d9f34718 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -30,6 +30,7 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const Driver &D = getToolChain().getDriver(); ArgStringList CmdArgs; const bool IsArch32Bit = getToolChain().getTriple().isArch32Bit(); @@ -38,6 +39,11 @@ void aix::Assembler::ConstructJob(Compilation &C, const JobAction &JA, if (!IsArch32Bit && !IsArch64Bit) llvm_unreachable("Unsupported bit width value."); + if (Arg *A = C.getArgs().getLastArg(options::OPT_G)) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getSpelling() << D.getTargetTriple(); + } + // Specify the mode in which the as(1) command operates. if (IsArch32Bit) { CmdArgs.push_back("-a32"); diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 86c789f5fcef5..de5a69e4ca3fd 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -47,11 +47,24 @@ void solaris::Assembler::ConstructJob(Compilation &C, const JobAction &JA, Exec, CmdArgs, Inputs, Output)); } +static bool getPIE(const ArgList &Args, const ToolChain &TC) { + if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) || + Args.hasArg(options::OPT_r)) + return false; + + Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie, + options::OPT_nopie); + if (!A) + return TC.isPIEDefault(Args); + return A->getOption().matches(options::OPT_pie); +} + void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, const ArgList &Args, const char *LinkingOutput) const { + const bool IsPIE = getPIE(Args, getToolChain()); ArgStringList CmdArgs; // Demangle C++ names in errors @@ -62,6 +75,11 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("_start"); } + if (IsPIE) { + CmdArgs.push_back("-z"); + CmdArgs.push_back("type=pie"); + } + if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-Bstatic"); CmdArgs.push_back("-dn"); @@ -113,8 +131,13 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, values_xpg = "values-xpg4.o"; CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath(values_xpg))); - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtbegin.o"))); + + const char *crtbegin = nullptr; + if (Args.hasArg(options::OPT_shared) || IsPIE) + crtbegin = "crtbeginS.o"; + else + crtbegin = "crtbegin.o"; + CmdArgs.push_back(Args.MakeArgString(getToolChain().GetFilePath(crtbegin))); // Add crtfastmath.o if available and fast math is enabled. getToolChain().addFastMathRuntimeIfAvailable(Args, CmdArgs); } @@ -171,8 +194,12 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, options::OPT_r)) { - CmdArgs.push_back( - Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); + if (Args.hasArg(options::OPT_shared) || IsPIE) + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtendS.o"))); + else + CmdArgs.push_back( + Args.MakeArgString(getToolChain().GetFilePath("crtend.o"))); CmdArgs.push_back( Args.MakeArgString(getToolChain().GetFilePath("crtn.o"))); } diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 32619bc56f7a3..852437b9390fc 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -581,7 +581,8 @@ void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { ProbablyBracedList = ProbablyBracedList || (NextTok->is(tok::l_brace) && LBraceStack.back().PrevTok && - LBraceStack.back().PrevTok->is(tok::identifier)); + LBraceStack.back().PrevTok->isOneOf(tok::identifier, + tok::greater)); ProbablyBracedList = ProbablyBracedList || @@ -2464,7 +2465,7 @@ bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) { const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr; const bool Blacklisted = PrevPrev && - (PrevPrev->is(tok::kw___attribute) || + (PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) || (SeenEqual && (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) || PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if)))); diff --git a/clang/lib/Frontend/FrontendAction.cpp b/clang/lib/Frontend/FrontendAction.cpp index c6f958a6077bf..0bd4b01ff79db 100644 --- a/clang/lib/Frontend/FrontendAction.cpp +++ b/clang/lib/Frontend/FrontendAction.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/FileEntry.h" #include "clang/Basic/LangStandard.h" #include "clang/Basic/Sarif.h" +#include "clang/Basic/Stack.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendDiagnostic.h" @@ -1150,6 +1151,10 @@ void ASTFrontendAction::ExecuteAction() { CompilerInstance &CI = getCompilerInstance(); if (!CI.hasPreprocessor()) return; + // This is a fallback: If the client forgets to invoke this, we mark the + // current stack as the bottom. Though not optimal, this could help prevent + // stack overflow during deep recursion. + clang::noteBottomOfStack(); // FIXME: Move the truncation aspect of this into Sema, we delayed this till // here so the source manager would be initialized. diff --git a/clang/lib/Interpreter/IncrementalExecutor.cpp b/clang/lib/Interpreter/IncrementalExecutor.cpp index 3f8d60630de41..2c4dfc9a611e0 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.cpp +++ b/clang/lib/Interpreter/IncrementalExecutor.cpp @@ -92,12 +92,19 @@ llvm::Error IncrementalExecutor::runCtors() const { llvm::Expected IncrementalExecutor::getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const { - auto Sym = (NameKind == LinkerName) ? Jit->lookupLinkerMangled(Name) - : Jit->lookup(Name); - - if (!Sym) - return Sym.takeError(); - return Sym; + using namespace llvm::orc; + auto SO = makeJITDylibSearchOrder({&Jit->getMainJITDylib(), + Jit->getPlatformJITDylib().get(), + Jit->getProcessSymbolsJITDylib().get()}); + + ExecutionSession &ES = Jit->getExecutionSession(); + + auto SymOrErr = + ES.lookup(SO, (NameKind == LinkerName) ? ES.intern(Name) + : Jit->mangleAndIntern(Name)); + if (auto Err = SymOrErr.takeError()) + return std::move(Err); + return SymOrErr->getAddress(); } } // end namespace clang diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 423d5372a6f65..1cff4a75790ec 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -19,6 +19,7 @@ #include "clang/AST/CharUnits.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/ExprConcepts.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/AST/Type.h" @@ -9072,16 +9073,24 @@ Sema::BuildExprRequirement( MultiLevelTemplateArgumentList MLTAL(Param, TAL.asArray(), /*Final=*/false); MLTAL.addOuterRetainedLevels(TPL->getDepth()); - Expr *IDC = Param->getTypeConstraint()->getImmediatelyDeclaredConstraint(); + const TypeConstraint *TC = Param->getTypeConstraint(); + assert(TC && "Type Constraint cannot be null here"); + auto *IDC = TC->getImmediatelyDeclaredConstraint(); + assert(IDC && "ImmediatelyDeclaredConstraint can't be null here."); ExprResult Constraint = SubstExpr(IDC, MLTAL); if (Constraint.isInvalid()) { - Status = concepts::ExprRequirement::SS_ExprSubstitutionFailure; - } else { - SubstitutedConstraintExpr = - cast(Constraint.get()); - if (!SubstitutedConstraintExpr->isSatisfied()) - Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; - } + return new (Context) concepts::ExprRequirement( + concepts::createSubstDiagAt(*this, IDC->getExprLoc(), + [&](llvm::raw_ostream &OS) { + IDC->printPretty(OS, /*Helper=*/nullptr, + getPrintingPolicy()); + }), + IsSimple, NoexceptLoc, ReturnTypeRequirement); + } + SubstitutedConstraintExpr = + cast(Constraint.get()); + if (!SubstitutedConstraintExpr->isSatisfied()) + Status = concepts::ExprRequirement::SS_ConstraintsNotSatisfied; } return new (Context) concepts::ExprRequirement(E, IsSimple, NoexceptLoc, ReturnTypeRequirement, Status, diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 8702e2ca3a1b3..394006a57747d 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2276,9 +2276,9 @@ QualType TemplateInstantiator::TransformSubstTemplateTypeParmPackType( getPackIndex(Pack), Arg, TL.getNameLoc()); } -template static concepts::Requirement::SubstitutionDiagnostic * -createSubstDiag(Sema &S, TemplateDeductionInfo &Info, EntityPrinter Printer) { +createSubstDiag(Sema &S, TemplateDeductionInfo &Info, + concepts::EntityPrinter Printer) { SmallString<128> Message; SourceLocation ErrorLoc; if (Info.hasSFINAEDiagnostic()) { @@ -2302,6 +2302,19 @@ createSubstDiag(Sema &S, TemplateDeductionInfo &Info, EntityPrinter Printer) { StringRef(MessageBuf, Message.size())}; } +concepts::Requirement::SubstitutionDiagnostic * +concepts::createSubstDiagAt(Sema &S, SourceLocation Location, + EntityPrinter Printer) { + SmallString<128> Entity; + llvm::raw_svector_ostream OS(Entity); + Printer(OS); + char *EntityBuf = new (S.Context) char[Entity.size()]; + llvm::copy(Entity, EntityBuf); + return new (S.Context) concepts::Requirement::SubstitutionDiagnostic{ + /*SubstitutedEntity=*/StringRef(EntityBuf, Entity.size()), + /*DiagLoc=*/Location, /*DiagMessage=*/StringRef()}; +} + ExprResult TemplateInstantiator::TransformRequiresTypeParams( SourceLocation KWLoc, SourceLocation RBraceLoc, const RequiresExpr *RE, RequiresExprBodyDecl *Body, ArrayRef Params, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 10b3587885e39..097e81ea7d45a 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7478,6 +7478,10 @@ StmtResult TreeTransform::TransformCompoundStmt(CompoundStmt *S, bool IsStmtExpr) { Sema::CompoundScopeRAII CompoundScope(getSema()); + Sema::FPFeaturesStateRAII FPSave(getSema()); + if (S->hasStoredFPFeatures()) + getSema().resetFPOptions( + S->getStoredFPFeatures().applyOverrides(getSema().getLangOpts())); const Stmt *ExprResult = S->getStmtExprResult(); bool SubStmtInvalid = false; diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp index 59dbc36d24e8c..8dd78152bd687 100644 --- a/clang/lib/Serialization/ASTWriterDecl.cpp +++ b/clang/lib/Serialization/ASTWriterDecl.cpp @@ -580,7 +580,7 @@ void ASTDeclWriter::VisitDeclaratorDecl(DeclaratorDecl *D) { } void ASTDeclWriter::VisitFunctionDecl(FunctionDecl *D) { - static_assert(DeclContext::NumFunctionDeclBits == 30, + static_assert(DeclContext::NumFunctionDeclBits == 31, "You need to update the serializer after you change the " "FunctionDeclBits"); @@ -1495,7 +1495,7 @@ void ASTDeclWriter::VisitCXXMethodDecl(CXXMethodDecl *D) { } void ASTDeclWriter::VisitCXXConstructorDecl(CXXConstructorDecl *D) { - static_assert(DeclContext::NumCXXConstructorDeclBits == 21, + static_assert(DeclContext::NumCXXConstructorDeclBits == 20, "You need to update the serializer after you change the " "CXXConstructorDeclBits"); diff --git a/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt b/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt index 5293f5e0a522d..0326798e3a174 100644 --- a/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt +++ b/clang/lib/StaticAnalyzer/Frontend/CMakeLists.txt @@ -26,4 +26,5 @@ add_clang_library(clangStaticAnalyzerFrontend DEPENDS omp_gen + ClangDriverOptions ) diff --git a/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc b/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc index a08ec11e77a4a..b46bd2e4d7a4b 100644 --- a/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc +++ b/clang/lib/Tooling/Inclusions/Stdlib/StdSymbolMap.inc @@ -3773,6 +3773,33 @@ SYMBOL(viewable_range, std::ranges::, ) SYMBOL(wistream_view, std::ranges::, ) SYMBOL(zip_transform_view, std::ranges::, ) SYMBOL(zip_view, std::ranges::, ) +SYMBOL(all, std::ranges::views::, ) +SYMBOL(all_t, std::ranges::views::, ) +SYMBOL(as_const, std::ranges::views::, ) +SYMBOL(as_rvalue, std::ranges::views::, ) +SYMBOL(common, std::ranges::views::, ) +SYMBOL(counted, std::ranges::views::, ) +SYMBOL(drop, std::ranges::views::, ) +SYMBOL(drop_while, std::ranges::views::, ) +SYMBOL(elements, std::ranges::views::, ) +SYMBOL(empty, std::ranges::views::, ) +SYMBOL(filter, std::ranges::views::, ) +SYMBOL(iota, std::ranges::views::, ) +SYMBOL(istream, std::ranges::views::, ) +SYMBOL(istream, std::ranges::views::, ) +SYMBOL(join, std::ranges::views::, ) +SYMBOL(join_with, std::ranges::views::, ) +SYMBOL(keys, std::ranges::views::, ) +SYMBOL(lazy_split, std::ranges::views::, ) +SYMBOL(reverse, std::ranges::views::, ) +SYMBOL(single, std::ranges::views::, ) +SYMBOL(split, std::ranges::views::, ) +SYMBOL(take, std::ranges::views::, ) +SYMBOL(take_while, std::ranges::views::, ) +SYMBOL(transform, std::ranges::views::, ) +SYMBOL(values, std::ranges::views::, ) +SYMBOL(zip, std::ranges::views::, ) +SYMBOL(zip_transform, std::ranges::views::, ) SYMBOL(ECMAScript, std::regex_constants::, ) SYMBOL(awk, std::regex_constants::, ) SYMBOL(basic, std::regex_constants::, ) @@ -3817,3 +3844,30 @@ SYMBOL(get_id, std::this_thread::, ) SYMBOL(sleep_for, std::this_thread::, ) SYMBOL(sleep_until, std::this_thread::, ) SYMBOL(yield, std::this_thread::, ) +SYMBOL(all, std::views::, ) +SYMBOL(all_t, std::views::, ) +SYMBOL(as_const, std::views::, ) +SYMBOL(as_rvalue, std::views::, ) +SYMBOL(common, std::views::, ) +SYMBOL(counted, std::views::, ) +SYMBOL(drop, std::views::, ) +SYMBOL(drop_while, std::views::, ) +SYMBOL(elements, std::views::, ) +SYMBOL(empty, std::views::, ) +SYMBOL(filter, std::views::, ) +SYMBOL(iota, std::views::, ) +SYMBOL(istream, std::views::, ) +SYMBOL(istream, std::views::, ) +SYMBOL(join, std::views::, ) +SYMBOL(join_with, std::views::, ) +SYMBOL(keys, std::views::, ) +SYMBOL(lazy_split, std::views::, ) +SYMBOL(reverse, std::views::, ) +SYMBOL(single, std::views::, ) +SYMBOL(split, std::views::, ) +SYMBOL(take, std::views::, ) +SYMBOL(take_while, std::views::, ) +SYMBOL(transform, std::views::, ) +SYMBOL(values, std::views::, ) +SYMBOL(zip, std::views::, ) +SYMBOL(zip_transform, std::views::, ) diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c index f4235795a8622..f8d755992eeac 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-tuple-type.c @@ -90,3 +90,36 @@ void baz(__rvv_int32m1x2_t v_tuple) { __rvv_int32m1x2_t qux(__rvv_int32m1x2_t v_tuple) { return v_tuple; } + +// O0-LABEL: define dso_local { , } @quux +// O0-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// O0-NEXT: entry: +// O0-NEXT: [[V_TUPLE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[V_TUPLE_ADDR:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[COERCE:%.*]] = alloca { , }, align 4 +// O0-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// O0-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// O0-NEXT: store { , } [[TMP1]], ptr [[V_TUPLE]], align 4 +// O0-NEXT: [[V_TUPLE1:%.*]] = load { , }, ptr [[V_TUPLE]], align 4 +// O0-NEXT: store { , } [[V_TUPLE1]], ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: [[TMP2:%.*]] = load { , }, ptr [[V_TUPLE_ADDR]], align 4 +// O0-NEXT: store { , } [[TMP2]], ptr [[COERCE]], align 4 +// O0-NEXT: [[COERCE_TUPLE:%.*]] = load { , }, ptr [[COERCE]], align 4 +// O0-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 0 +// O0-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[COERCE_TUPLE]], 1 +// O0-NEXT: [[CALL:%.*]] = call { , } @qux( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +// O0-NEXT: ret { , } [[CALL]] +// +// AFTER_MEM2REG-LABEL: define dso_local { , } @quux +// AFTER_MEM2REG-SAME: ( [[V_TUPLE_COERCE0:%.*]], [[V_TUPLE_COERCE1:%.*]]) #[[ATTR0]] { +// AFTER_MEM2REG-NEXT: entry: +// AFTER_MEM2REG-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0]], 0 +// AFTER_MEM2REG-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1]], 1 +// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[TMP1]], 0 +// AFTER_MEM2REG-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[TMP1]], 1 +// AFTER_MEM2REG-NEXT: [[CALL:%.*]] = call { , } @qux( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +// AFTER_MEM2REG-NEXT: ret { , } [[CALL]] +// +__rvv_int32m1x2_t quux(__rvv_int32m1x2_t v_tuple) { + return qux(v_tuple); +} diff --git a/clang/test/CodeGen/attr-target-mv-va-args.c b/clang/test/CodeGen/attr-target-mv-va-args.c index e75796d7ee038..96821c610235b 100644 --- a/clang/test/CodeGen/attr-target-mv-va-args.c +++ b/clang/test/CodeGen/attr-target-mv-va-args.c @@ -1,6 +1,8 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=IFUNC-ELF +// RUN: %clang_cc1 -triple x86_64-pc-freebsd -emit-llvm %s -o - | FileCheck %s --check-prefix=IFUNC-ELF // RUN: %clang_cc1 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefixes=NO-IFUNC,WINDOWS -// RUN: %clang_cc1 -triple x86_64-fuchsia -emit-llvm %s -o - | FileCheck %s --check-prefixes=NO-IFUNC,FUCHSIA +// RUN: %clang_cc1 -triple x86_64-linux-musl -emit-llvm %s -o - | FileCheck %s --check-prefixes=NO-IFUNC,NO-IFUNC-ELF +// RUN: %clang_cc1 -triple x86_64-fuchsia -emit-llvm %s -o - | FileCheck %s --check-prefixes=NO-IFUNC,NO-IFUNC-ELF int __attribute__((target("sse4.2"))) foo(int i, ...) { return 0; } int __attribute__((target("arch=sandybridge"))) foo(int i, ...); int __attribute__((target("arch=ivybridge"))) foo(int i, ...) {return 1;} @@ -10,23 +12,23 @@ int bar(void) { return foo(1, 'a', 1.1) + foo(2, 2.2, "asdf"); } -// LINUX: @foo.ifunc = weak_odr ifunc i32 (i32, ...), ptr @foo.resolver -// LINUX: define{{.*}} i32 @foo.sse4.2(i32 noundef %i, ...) -// LINUX: ret i32 0 -// LINUX: define{{.*}} i32 @foo.arch_ivybridge(i32 noundef %i, ...) -// LINUX: ret i32 1 -// LINUX: define{{.*}} i32 @foo(i32 noundef %i, ...) -// LINUX: ret i32 2 -// LINUX: define{{.*}} i32 @bar() -// LINUX: call i32 (i32, ...) @foo.ifunc(i32 noundef 1, i32 noundef 97, double -// LINUX: call i32 (i32, ...) @foo.ifunc(i32 noundef 2, double noundef 2.2{{[0-9Ee+]+}}, ptr noundef +// IFUNC-ELF: @foo.ifunc = weak_odr ifunc i32 (i32, ...), ptr @foo.resolver +// IFUNC-ELF: define{{.*}} i32 @foo.sse4.2(i32 noundef %i, ...) +// IFUNC-ELF: ret i32 0 +// IFUNC-ELF: define{{.*}} i32 @foo.arch_ivybridge(i32 noundef %i, ...) +// IFUNC-ELF: ret i32 1 +// IFUNC-ELF: define{{.*}} i32 @foo(i32 noundef %i, ...) +// IFUNC-ELF: ret i32 2 +// IFUNC-ELF: define{{.*}} i32 @bar() +// IFUNC-ELF: call i32 (i32, ...) @foo.ifunc(i32 noundef 1, i32 noundef 97, double +// IFUNC-ELF: call i32 (i32, ...) @foo.ifunc(i32 noundef 2, double noundef 2.2{{[0-9Ee+]+}}, ptr noundef -// LINUX: define weak_odr ptr @foo.resolver() comdat -// LINUX: ret ptr @foo.arch_sandybridge -// LINUX: ret ptr @foo.arch_ivybridge -// LINUX: ret ptr @foo.sse4.2 -// LINUX: ret ptr @foo -// LINUX: declare i32 @foo.arch_sandybridge(i32 noundef, ...) +// IFUNC-ELF: define weak_odr ptr @foo.resolver() comdat +// IFUNC-ELF: ret ptr @foo.arch_sandybridge +// IFUNC-ELF: ret ptr @foo.arch_ivybridge +// IFUNC-ELF: ret ptr @foo.sse4.2 +// IFUNC-ELF: ret ptr @foo +// IFUNC-ELF: declare i32 @foo.arch_sandybridge(i32 noundef, ...) // NO-IFUNC: define dso_local i32 @foo.sse4.2(i32 noundef %i, ...) // NO-IFUNC: ret i32 0 @@ -39,10 +41,10 @@ int bar(void) { // NO-IFUNC: call i32 (i32, ...) @foo.resolver(i32 noundef 2, double noundef 2.2{{[0-9Ee+]+}}, ptr noundef // WINDOWS: define weak_odr dso_local i32 @foo.resolver(i32 %0, ...) comdat -// FUCHSIA: define weak_odr i32 @foo.resolver(i32 %0, ...) comdat +// NO-IFUNC-ELF: define weak_odr i32 @foo.resolver(i32 %0, ...) comdat // NO-IFUNC: musttail call i32 (i32, ...) @foo.arch_sandybridge // NO-IFUNC: musttail call i32 (i32, ...) @foo.arch_ivybridge // NO-IFUNC: musttail call i32 (i32, ...) @foo.sse4.2 // NO-IFUNC: musttail call i32 (i32, ...) @foo // WINDOWS: declare dso_local i32 @foo.arch_sandybridge(i32 noundef, ...) -// FUCHSIA: declare i32 @foo.arch_sandybridge(i32 noundef, ...) +// NO-IFUNC-ELF: declare i32 @foo.arch_sandybridge(i32 noundef, ...) diff --git a/clang/test/CodeGen/unique-internal-linkage-names.cpp b/clang/test/CodeGen/unique-internal-linkage-names.cpp index 731c2c143d7b7..65cf9db80b917 100644 --- a/clang/test/CodeGen/unique-internal-linkage-names.cpp +++ b/clang/test/CodeGen/unique-internal-linkage-names.cpp @@ -1,7 +1,7 @@ // This test checks if internal linkage symbols get unique names with // -funique-internal-linkage-names option. -// RUN: %clang_cc1 -triple x86_64 -x c++ -S -emit-llvm -o - < %s | FileCheck %s --check-prefix=PLAIN -// RUN: %clang_cc1 -triple x86_64 -x c++ -S -emit-llvm -funique-internal-linkage-names -o - < %s | FileCheck %s --check-prefix=UNIQUE +// RUN: %clang_cc1 -triple x86_64-linux-gnu -x c++ -S -emit-llvm -o - < %s | FileCheck %s --check-prefix=PLAIN +// RUN: %clang_cc1 -triple x86_64-linux-gnu -x c++ -S -emit-llvm -funique-internal-linkage-names -o - < %s | FileCheck %s --check-prefix=UNIQUE static int glob; static int foo() { diff --git a/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp b/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp new file mode 100644 index 0000000000000..f935e256d9db9 --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-awaiter-noinline-suspend.cpp @@ -0,0 +1,207 @@ +// Tests that we can mark await-suspend as noinline correctly. +// +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \ +// RUN: -disable-llvm-passes | FileCheck %s + +#include "Inputs/coroutine.h" + +struct Task { + struct promise_type { + struct FinalAwaiter { + bool await_ready() const noexcept { return false; } + template + std::coroutine_handle<> await_suspend(std::coroutine_handle h) noexcept { + return h.promise().continuation; + } + void await_resume() noexcept {} + }; + + Task get_return_object() noexcept { + return std::coroutine_handle::from_promise(*this); + } + + std::suspend_always initial_suspend() noexcept { return {}; } + FinalAwaiter final_suspend() noexcept { return {}; } + void unhandled_exception() noexcept {} + void return_void() noexcept {} + + std::coroutine_handle<> continuation; + }; + + Task(std::coroutine_handle handle); + ~Task(); + +private: + std::coroutine_handle handle; +}; + +struct StatefulAwaiter { + int value; + bool await_ready() const noexcept { return false; } + template + void await_suspend(std::coroutine_handle h) noexcept {} + void await_resume() noexcept {} +}; + +typedef std::suspend_always NoStateAwaiter; +using AnotherStatefulAwaiter = StatefulAwaiter; + +template +struct TemplatedAwaiter { + T value; + bool await_ready() const noexcept { return false; } + template + void await_suspend(std::coroutine_handle h) noexcept {} + void await_resume() noexcept {} +}; + + +class Awaitable {}; +StatefulAwaiter operator co_await(Awaitable) { + return StatefulAwaiter{}; +} + +StatefulAwaiter GlobalAwaiter; +class Awaitable2 {}; +StatefulAwaiter& operator co_await(Awaitable2) { + return GlobalAwaiter; +} + +Task testing() { + co_await std::suspend_always{}; + co_await StatefulAwaiter{}; + co_await AnotherStatefulAwaiter{}; + + // Test lvalue case. + StatefulAwaiter awaiter; + co_await awaiter; + + // The explicit call to await_suspend is not considered suspended. + awaiter.await_suspend(std::coroutine_handle::from_address(nullptr)); + + co_await TemplatedAwaiter{}; + TemplatedAwaiter TemplatedAwaiterInstace; + co_await TemplatedAwaiterInstace; + + co_await Awaitable{}; + co_await Awaitable2{}; +} + +// CHECK-LABEL: @_Z7testingv + +// Check `co_await __promise__.initial_suspend();` Since it returns std::suspend_always, +// which is an empty class, we shouldn't generate optimization blocker for it. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE{{.*}}#[[NORMAL_ATTR:[0-9]+]] + +// Check the `co_await std::suspend_always{};` expression. We shouldn't emit the optimization +// blocker for it since it is an empty class. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE{{.*}}#[[NORMAL_ATTR]] + +// Check `co_await StatefulAwaiter{};`. We need to emit the optimization blocker since +// the awaiter is not empty. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR:[0-9]+]] + +// Check `co_await AnotherStatefulAwaiter{};` to make sure that we can handle TypedefTypes. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `co_await awaiter;` to make sure we can handle lvalue cases. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `awaiter.await_suspend(...)` to make sure the explicit call the await_suspend won't be marked as noinline +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIvEEvSt16coroutine_handleIT_E{{.*}}#[[NORMAL_ATTR]] + +// Check `co_await TemplatedAwaiter{};` to make sure we can handle specialized template +// type. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN16TemplatedAwaiterIiE13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `co_await TemplatedAwaiterInstace;` to make sure we can handle the lvalue from +// specialized template type. +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN16TemplatedAwaiterIiE13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `co_await Awaitable{};` to make sure we can handle awaiter returned by +// `operator co_await`; +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `co_await Awaitable2{};` to make sure we can handle awaiter returned by +// `operator co_await` which returns a reference; +// CHECK: call token @llvm.coro.save +// CHECK: call void @_ZN15StatefulAwaiter13await_suspendIN4Task12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NOINLINE_ATTR]] + +// Check `co_await __promise__.final_suspend();`. We don't emit an blocker here since it is +// empty. +// CHECK: call token @llvm.coro.save +// CHECK: call ptr @_ZN4Task12promise_type12FinalAwaiter13await_suspendIS0_EESt16coroutine_handleIvES3_IT_E{{.*}}#[[NORMAL_ATTR]] + +struct AwaitTransformTask { + struct promise_type { + struct FinalAwaiter { + bool await_ready() const noexcept { return false; } + template + std::coroutine_handle<> await_suspend(std::coroutine_handle h) noexcept { + return h.promise().continuation; + } + void await_resume() noexcept {} + }; + + AwaitTransformTask get_return_object() noexcept { + return std::coroutine_handle::from_promise(*this); + } + + std::suspend_always initial_suspend() noexcept { return {}; } + FinalAwaiter final_suspend() noexcept { return {}; } + void unhandled_exception() noexcept {} + void return_void() noexcept {} + + template + auto await_transform(Awaitable &&awaitable) { + return awaitable; + } + + std::coroutine_handle<> continuation; + }; + + AwaitTransformTask(std::coroutine_handle handle); + ~AwaitTransformTask(); + +private: + std::coroutine_handle handle; +}; + +struct awaitableWithGetAwaiter { + bool await_ready() const noexcept { return false; } + template + void await_suspend(std::coroutine_handle h) noexcept {} + void await_resume() noexcept {} +}; + +AwaitTransformTask testingWithAwaitTransform() { + co_await awaitableWithGetAwaiter{}; +} + +// CHECK-LABEL: @_Z25testingWithAwaitTransformv + +// Init suspend +// CHECK: call token @llvm.coro.save +// CHECK-NOT: call void @llvm.coro.opt.blocker( +// CHECK: call void @_ZNSt14suspend_always13await_suspendESt16coroutine_handleIvE{{.*}}#[[NORMAL_ATTR]] + +// Check `co_await awaitableWithGetAwaiter{};`. +// CHECK: call token @llvm.coro.save +// CHECK-NOT: call void @llvm.coro.opt.blocker( +// Check call void @_ZN23awaitableWithGetAwaiter13await_suspendIN18AwaitTransformTask12promise_typeEEEvSt16coroutine_handleIT_E{{.*}}#[[NORMAL_ATTR]] + +// Final suspend +// CHECK: call token @llvm.coro.save +// CHECK-NOT: call void @llvm.coro.opt.blocker( +// CHECK: call ptr @_ZN18AwaitTransformTask12promise_type12FinalAwaiter13await_suspendIS0_EESt16coroutine_handleIvES3_IT_E{{.*}}#[[NORMAL_ATTR]] + +// CHECK-NOT: attributes #[[NORMAL_ATTR]] = noinline +// CHECK: attributes #[[NOINLINE_ATTR]] = {{.*}}noinline diff --git a/clang/test/CodeGenCoroutines/coro-halo.cpp b/clang/test/CodeGenCoroutines/coro-halo.cpp index 6244f130b7be2..e75bedaf81fa2 100644 --- a/clang/test/CodeGenCoroutines/coro-halo.cpp +++ b/clang/test/CodeGenCoroutines/coro-halo.cpp @@ -1,5 +1,7 @@ // This tests that the coroutine heap allocation elision optimization could happen succesfully. // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s \ +// RUN: -fcxx-exceptions -fexceptions -o - | FileCheck %s #include "Inputs/coroutine.h" #include "Inputs/numeric.h" diff --git a/clang/test/CodeGenCoroutines/pr56301.cpp b/clang/test/CodeGenCoroutines/pr56301.cpp new file mode 100644 index 0000000000000..cd851c0b815db --- /dev/null +++ b/clang/test/CodeGenCoroutines/pr56301.cpp @@ -0,0 +1,85 @@ +// An end-to-end test to make sure things get processed correctly. +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s -O3 | \ +// RUN: FileCheck %s + +#include "Inputs/coroutine.h" + +struct SomeAwaitable { + // Resume the supplied handle once the awaitable becomes ready, + // returning a handle that should be resumed now for the sake of symmetric transfer. + // If the awaitable is already ready, return an empty handle without doing anything. + // + // Defined in another translation unit. Note that this may contain + // code that synchronizees with another thread. + std::coroutine_handle<> Register(std::coroutine_handle<>); +}; + +// Defined in another translation unit. +void DidntSuspend(); + +struct Awaiter { + SomeAwaitable&& awaitable; + bool suspended; + + bool await_ready() { return false; } + + std::coroutine_handle<> await_suspend(const std::coroutine_handle<> h) { + // Assume we will suspend unless proven otherwise below. We must do + // this *before* calling Register, since we may be destroyed by another + // thread asynchronously as soon as we have registered. + suspended = true; + + // Attempt to hand off responsibility for resuming/destroying the coroutine. + const auto to_resume = awaitable.Register(h); + + if (!to_resume) { + // The awaitable is already ready. In this case we know that Register didn't + // hand off responsibility for the coroutine. So record the fact that we didn't + // actually suspend, and tell the compiler to resume us inline. + suspended = false; + return h; + } + + // Resume whatever Register wants us to resume. + return to_resume; + } + + void await_resume() { + // If we didn't suspend, make note of that fact. + if (!suspended) { + DidntSuspend(); + } + } +}; + +struct MyTask{ + struct promise_type { + MyTask get_return_object() { return {}; } + std::suspend_never initial_suspend() { return {}; } + std::suspend_always final_suspend() noexcept { return {}; } + void unhandled_exception(); + + Awaiter await_transform(SomeAwaitable&& awaitable) { + return Awaiter{static_cast(awaitable)}; + } + }; +}; + +MyTask FooBar() { + co_await SomeAwaitable(); +} + +// CHECK-LABEL: @_Z6FooBarv +// CHECK: %[[to_resume:.*]] = {{.*}}call ptr @_ZN13SomeAwaitable8RegisterESt16coroutine_handleIvE +// CHECK-NEXT: %[[to_bool:.*]] = icmp eq ptr %[[to_resume]], null +// CHECK-NEXT: br i1 %[[to_bool]], label %[[then:.*]], label %[[else:.*]] + +// CHECK: [[then]]: +// We only access the coroutine frame conditionally as the sources did. +// CHECK: store i8 0, +// CHECK-NEXT: br label %[[else]] + +// CHECK: [[else]]: +// No more access to the coroutine frame until suspended. +// CHECK-NOT: store +// CHECK: } diff --git a/clang/test/CodeGenCoroutines/pr59723.cpp b/clang/test/CodeGenCoroutines/pr59723.cpp new file mode 100644 index 0000000000000..7fc9995f417ac --- /dev/null +++ b/clang/test/CodeGenCoroutines/pr59723.cpp @@ -0,0 +1,237 @@ +// This is reduced test case from https://github.com/llvm/llvm-project/issues/59723. +// This is not a minimal reproducer intentionally to check the compiler's ability. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fcxx-exceptions\ +// RUN: -fexceptions -O2 -emit-llvm %s -o - | FileCheck %s + +#include "Inputs/coroutine.h" + +// executor and operation base + +class bug_any_executor; + +struct bug_async_op_base +{ + void invoke(); + +protected: + + ~bug_async_op_base() = default; +}; + +class bug_any_executor +{ + using op_type = bug_async_op_base; + +public: + + virtual ~bug_any_executor() = default; + + // removing noexcept enables clang to find that the pointer has escaped + virtual void post(op_type& op) noexcept = 0; + + virtual void wait() noexcept = 0; +}; + +class bug_thread_executor : public bug_any_executor +{ + +public: + + void start() + { + + } + + ~bug_thread_executor() + { + } + + // although this implementation is not realy noexcept due to allocation but I have a real one that is and required to be noexcept + virtual void post(bug_async_op_base& op) noexcept override; + + virtual void wait() noexcept override + { + + } +}; + +// task and promise + +struct bug_final_suspend_notification +{ + virtual std::coroutine_handle<> get_waiter() = 0; +}; + +class bug_task; + +class bug_task_promise +{ + friend bug_task; +public: + + bug_task get_return_object() noexcept; + + constexpr std::suspend_always initial_suspend() noexcept { return {}; } + + std::suspend_always final_suspend() noexcept + { + return {}; + } + + void unhandled_exception() noexcept; + + constexpr void return_void() const noexcept {} + + void get_result() const + { + + } +}; + +template +T exchange(T &&t, U &&u) { + T ret = t; + t = u; + return ret; +} + +class bug_task +{ + friend bug_task_promise; + using handle = std::coroutine_handle<>; + using promise_t = bug_task_promise; + + bug_task(handle coro, promise_t* p) noexcept : this_coro{ coro }, this_promise{ p } + { + + } + +public: + using promise_type = bug_task_promise; + + bug_task(bug_task&& other) noexcept + : this_coro{ exchange(other.this_coro, nullptr) }, this_promise{ exchange(other.this_promise, nullptr) } { + + } + + ~bug_task() + { + if (this_coro) + this_coro.destroy(); + } + + constexpr bool await_ready() const noexcept + { + return false; + } + + handle await_suspend(handle waiter) noexcept + { + return this_coro; + } + + void await_resume() + { + return this_promise->get_result(); + } + + handle this_coro; + promise_t* this_promise; +}; + +bug_task bug_task_promise::get_return_object() noexcept +{ + return { std::coroutine_handle::from_promise(*this), this }; +} + +// spawn operation and spawner + +template +class bug_spawn_op final : public bug_async_op_base, bug_final_suspend_notification +{ + Handler handler; + bug_task task_; + +public: + + bug_spawn_op(Handler handler, bug_task&& t) + : handler { handler }, task_{ static_cast(t) } {} + + virtual std::coroutine_handle<> get_waiter() override + { + handler(); + return std::noop_coroutine(); + } +}; + +class bug_spawner; + +struct bug_spawner_awaiter +{ + bug_spawner& s; + std::coroutine_handle<> waiter; + + bug_spawner_awaiter(bug_spawner& s) : s{ s } {} + + bool await_ready() const noexcept; + + void await_suspend(std::coroutine_handle<> coro); + + void await_resume() {} +}; + +class bug_spawner +{ + friend bug_spawner_awaiter; + + struct final_handler_t + { + bug_spawner& s; + + void operator()() + { + s.awaiter_->waiter.resume(); + } + }; + +public: + + bug_spawner(bug_any_executor& ex) : ex_{ ex } {} + + void spawn(bug_task&& t) { + using op_t = bug_spawn_op; + // move task into ptr + op_t* ptr = new op_t(final_handler_t{ *this }, static_cast(t)); + ++count_; + ex_.post(*ptr); // ptr escapes here thus task escapes but clang can't deduce that unless post() is not noexcept + } + + bug_spawner_awaiter wait() noexcept { return { *this }; } + +private: + bug_any_executor& ex_; // if bug_thread_executor& is used instead enables clang to detect the escape of the promise + bug_spawner_awaiter* awaiter_ = nullptr; + unsigned count_ = 0; +}; + +// test case + +bug_task bug_spawned_task(int id, int inc) +{ + co_return; +} + +struct A { + A(); +}; + +void throwing_fn(bug_spawner& s) { + s.spawn(bug_spawned_task(1, 2)); + throw A{}; +} + +// Check that the coroutine frame of bug_spawned_task are allocated from operator new. +// CHECK: define{{.*}}@_Z11throwing_fnR11bug_spawner +// CHECK-NOT: alloc +// CHECK: %[[CALL:.+]] = {{.*}}@_Znwm(i64{{.*}} 24) +// CHECK: store ptr @_Z16bug_spawned_taskii.resume, ptr %[[CALL]] diff --git a/clang/test/CodeGenObjCXX/crash-function-type.mm b/clang/test/CodeGenObjCXX/crash-function-type.mm index 53acc58dfc44d..280497a3258a4 100644 --- a/clang/test/CodeGenObjCXX/crash-function-type.mm +++ b/clang/test/CodeGenObjCXX/crash-function-type.mm @@ -1,3 +1,6 @@ +// Mark test as unsupported on PS5 due to PS5 doesn't support function sanitizer. +// UNSUPPORTED: target=x86_64-sie-ps5 + // RUN: %clang_cc1 -fblocks -fsanitize=function -emit-llvm %s -o %t void g(void (^)()); diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbeginS.o b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/crtbeginS.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtbeginS.o b/clang/test/Driver/Inputs/solaris_sparc_tree/usr/gcc/4.8/lib/gcc/sparc-sun-solaris2.11/4.8.2/sparcv9/crtbeginS.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 182de9f486444..9442f6b91471f 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -971,3 +971,17 @@ // RUN: %clang --target=x86_64-linux-gnu -fsanitize=undefined,function -mcmodel=large %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION-CODE-MODEL // CHECK-UBSAN-FUNCTION-CODE-MODEL: error: invalid argument '-fsanitize=function' only allowed with '-mcmodel=small' + +// RUN: %clang --target=x86_64-sie-ps5 -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION +// RUN: %clang --target=x86_64-sie-ps5 -fsanitize=undefined -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION +// RUN: %clang --target=x86_64-sie-ps5 -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI +// RUN: %clang --target=x86_64-sie-ps5 -fsanitize=function -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI --check-prefix=CHECK-UBSAN-FUNCTION +// RUN: %clang --target=x86_64-sie-ps5 -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED + +// RUN: %clang --target=armv6t2-eabi -mexecute-only -fsanitize=function %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-FUNCTION +// RUN: %clang --target=armv6t2-eabi -mexecute-only -fsanitize=kcfi %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-KCFI +// RUN: %clang --target=armv6t2-eabi -mexecute-only -fsanitize=undefined %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-UBSAN-UNDEFINED + +// CHECK-UBSAN-KCFI-DAG: error: invalid argument '-fsanitize=kcfi' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}} +// CHECK-UBSAN-FUNCTION-DAG: error: invalid argument '-fsanitize=function' not allowed with {{('x86_64-sie-ps5'|'armv6t2-unknown-unknown-eabi')}} +// CHECK-UBSAN-UNDEFINED: "-fsanitize={{((alignment|array-bounds|bool|builtin|enum|float-cast-overflow|integer-divide-by-zero|nonnull-attribute|null|pointer-overflow|return|returns-nonnull-attribute|shift-base|shift-exponent|signed-integer-overflow|unreachable|vla-bound),?){17}"}} diff --git a/clang/test/Driver/solaris-ld.c b/clang/test/Driver/solaris-ld.c index 2127ad5ded074..8d97a5a3695bd 100644 --- a/clang/test/Driver/solaris-ld.c +++ b/clang/test/Driver/solaris-ld.c @@ -106,6 +106,33 @@ // CHECK-SPARC32-SHARED-NOT: "-lgcc" // CHECK-SPARC32-SHARED-NOT: "-lm" +// Check the right ld flags are present with -pie. +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -pie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-PIE %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -nopie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOPIE %s + +// Check that -shared/-r/-static disable PIE. +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -shared -pie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOPIE %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -r -pie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOPIE %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -static -pie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOPIE %s + +// CHECK-PIE: "-z" "type=pie" +// CHECK-NOPIE-NOT: "-z" "type=pie" + // -r suppresses default -l and crt*.o, values-*.o like -nostdlib. // RUN: %clang -### %s --target=sparc-sun-solaris2.11 -r 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-RELOCATABLE @@ -115,6 +142,28 @@ // CHECK-RELOCATABLE-NOT: /crt{{[^.]+}}.o // CHECK-RELOCATABLE-NOT: /values-{{[^.]+}}.o +// Check that crt{begin,end}S.o is linked with -shared/-pie. +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTS %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -shared \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CRTS %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -nopie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NOCRTS %s +// RUN: %clang --target=sparc-sun-solaris2.11 -### %s -pie \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/solaris_sparc_tree 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-CRTS %s +// CHECK-CRTS: crtbeginS.o +// CHECK-CRTS: crtendS.o +// CHECK-NOCRTS-NOT: crtbeginS.o +// CHECK-NOCRTS-NOT: crtendS.o + // Check that crtfastmath.o is linked with -ffast-math. // Check sparc-sun-solaris2.11, 32bit diff --git a/clang/test/Driver/target-specific.s b/clang/test/Driver/target-specific.s new file mode 100644 index 0000000000000..aa4fc73812099 --- /dev/null +++ b/clang/test/Driver/target-specific.s @@ -0,0 +1,12 @@ +/// Check that we report a warning instead of an error for target-specific compilation only options. +// RUN: %clang -### --target=aarch64 -faddrsig -mbranch-protection=standard -c %s 2>&1 | FileCheck %s +// RUN: %clang -### --target=aarch64 -faddrsig -mbranch-protection=standard -c -fno-integrated-as %s 2>&1 | FileCheck %s + +/// Report a warning if we perform the link phase. +// RUN: %clang -### --target=aarch64 -faddrsig -mbranch-protection=standard %s 2>&1 | FileCheck %s + +// CHECK: warning: argument unused during compilation: '-faddrsig' +// CHECK: warning: argument unused during compilation: '-mbranch-protection=standard' + +/// assembler-with-cpp claims compile only options. Ideally we should emit a warning. +// RUN: %clang -### -Werror --target=aarch64 -c -faddrsig -mbranch-protection=standard -x assembler-with-cpp %s diff --git a/clang/test/Driver/x86-mfpmath.c b/clang/test/Driver/x86-mfpmath.c new file mode 100644 index 0000000000000..8f85cced953ab --- /dev/null +++ b/clang/test/Driver/x86-mfpmath.c @@ -0,0 +1,5 @@ +// RUN: %clang -### -c --target=x86_64 -mfpmath=sse %s 2>&1 | FileCheck %s +// CHECK: "-mfpmath" "sse" + +// RUN: %clang -### -c --target=x86_64 -mfpmath=sse -x assembler %s 2>&1 | FileCheck %s --check-prefix=WARN +// WARN: warning: argument unused during compilation: '-mfpmath=sse' diff --git a/clang/test/Driver/x86-no-gather-no-scatter.cpp b/clang/test/Driver/x86-no-gather-no-scatter.cpp index 7efcc55787c42..63611227bd583 100644 --- a/clang/test/Driver/x86-no-gather-no-scatter.cpp +++ b/clang/test/Driver/x86-no-gather-no-scatter.cpp @@ -1,8 +1,8 @@ /// Tests -mno-gather and -mno-scatter -// RUN: %clang -c -mno-gather -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s -// RUN: %clang_cl -c /Qgather- -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -c -mno-gather -### %s 2>&1 | FileCheck --check-prefix=NOGATHER %s +// RUN: %clang_cl --target=x86_64-windows -c /Qgather- -### -- %s 2>&1 | FileCheck --check-prefix=NOGATHER %s // NOGATHER: "-target-feature" "+prefer-no-gather" -// RUN: %clang -c -mno-scatter -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s -// RUN: %clang_cl -c /Qscatter- -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s +// RUN: %clang -target x86_64-unknown-linux-gnu -c -mno-scatter -### %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s +// RUN: %clang_cl --target=x86_64-windows -c /Qscatter- -### -- %s 2>&1 | FileCheck --check-prefix=NOSCATTER %s // NOSCATTER: "-target-feature" "+prefer-no-scatter" diff --git a/clang/test/Misc/constexpr-subobj-init-source-ranges.cpp b/clang/test/Misc/constexpr-subobj-init-source-ranges.cpp new file mode 100644 index 0000000000000..342da2d886668 --- /dev/null +++ b/clang/test/Misc/constexpr-subobj-init-source-ranges.cpp @@ -0,0 +1,11 @@ +// RUN: not %clang_cc1 -fsyntax-only -fdiagnostics-print-source-range-info %s 2>&1 | FileCheck %s --strict-whitespace + +struct DelBase { + constexpr DelBase() = delete; +}; + +// CHECK: :{[[@LINE+1]]:21-[[@LINE+1]]:28} +struct Foo : public DelBase { + constexpr Foo() {}; +}; +constexpr Foo f; diff --git a/clang/test/Sema/empty-init.c b/clang/test/Sema/empty-init.c new file mode 100644 index 0000000000000..8cb4a77710c2b --- /dev/null +++ b/clang/test/Sema/empty-init.c @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 %s -std=c2x -Wall -pedantic -fsyntax-only -verify=good +// RUN: %clang_cc1 %s -std=c2x -Wpre-c2x-compat -fsyntax-only -verify=c2x +// RUN: %clang_cc1 %s -std=c2x -Wpre-c2x-compat -Wno-gnu-empty-initializer -fsyntax-only -verify=c2x +// RUN: %clang_cc1 %s -std=c2x -Wgnu-empty-initializer -fsyntax-only -verify=good +// RUN: %clang_cc1 %s -std=c17 -Wall -pedantic -fsyntax-only -verify=c2x-ext +// RUN: %clang_cc1 %s -std=c17 -Wgnu-empty-initializer -fsyntax-only -verify=good +// RUN: %clang_cc1 %s -std=c17 -Wc2x-extensions -fsyntax-only -verify=c2x-ext +// RUN: %clang_cc1 %s -std=c17 -Wpre-c2x-compat -fsyntax-only -verify=good + +// good-no-diagnostics + +// Empty brace initialization used to be a GNU extension, but the feature was +// added to C2x. We now treat empty initialization as a C extension rather than +// a GNU extension. Thus, -Wgnu-empty-initializer is always silently ignored. + +struct S { + int a; +}; + +struct S s = {}; /* c2x-warning {{use of an empty initializer is incompatible with C standards before C2x}} + c2x-ext-warning {{use of an empty initializer is a C2x extension}} + */ + +void func(void) { + struct S s2 = {}; /* c2x-warning {{use of an empty initializer is incompatible with C standards before C2x}} + c2x-ext-warning {{use of an empty initializer is a C2x extension}} + */ + (void)s2; +} + diff --git a/clang/test/Sema/riscv-rvv-lax-vector-conversions.c b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c index ff3e028aa314d..8ab01620b82aa 100644 --- a/clang/test/Sema/riscv-rvv-lax-vector-conversions.c +++ b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c @@ -2,8 +2,6 @@ // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s // RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s -// lax-vector-all-no-diagnostics - // REQUIRES: riscv-registered-target #define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))) @@ -20,6 +18,8 @@ typedef __rvv_uint64m1_t vuint64m1_t; typedef __rvv_float32m1_t vfloat32m1_t; typedef __rvv_float64m1_t vfloat64m1_t; +typedef __rvv_int64m2_t vint64m2_t; + typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR; typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR; typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR; @@ -76,3 +76,17 @@ void gnu_allowed_with_all_lax_conversions() { // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} } + +void not_allowed() { + rvv_fixed_int32m1_t fi32m1; + vint64m2_t si64m2; + + fi32m1 = si64m2; + // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + // lax-vector-all-error@-3 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + si64m2 = fi32m1; + // lax-vector-none-error@-1 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}} + // lax-vector-all-error@-3 {{assigning to 'vint64m2_t' (aka '__rvv_int64m2_t') from incompatible type}} +} diff --git a/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp new file mode 100644 index 0000000000000..00a39f9f03b79 --- /dev/null +++ b/clang/test/SemaCXX/concept-crash-on-diagnostic.cpp @@ -0,0 +1,37 @@ +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -verify %s + +template class normal_iterator {}; + +template struct is_convertible {}; + +template +inline constexpr bool is_convertible_v = is_convertible::value; // expected-error {{no member named 'value' in 'is_convertible'}} + +template +concept convertible_to = is_convertible_v; // #1 + +template + requires requires(IteratorL lhs, IteratorR rhs) { // #2 + { lhs == rhs } -> convertible_to; // #3 + } +constexpr bool compare(normal_iterator lhs, normal_iterator rhs) { // #4 + return false; +} + +class Object; + +void function() { + normal_iterator begin, end; + compare(begin, end); // expected-error {{no matching function for call to 'compare'}} #5 +} + +// expected-note@#1 {{in instantiation of variable template specialization 'is_convertible_v' requested here}} +// expected-note@#1 {{substituting template arguments into constraint expression here}} +// expected-note@#3 {{checking the satisfaction of concept 'convertible_to'}} +// expected-note@#2 {{substituting template arguments into constraint expression here}} +// expected-note@#5 {{checking constraint satisfaction for template 'compare'}} +// expected-note@#5 {{in instantiation of function template specialization 'compare' requested here}} + +// expected-note@#4 {{candidate template ignored: constraints not satisfied [with IteratorL = Object *, IteratorR = Object *]}} +// We don't know exactly the substituted type for `lhs == rhs`, thus a placeholder 'expr-type' is emitted. +// expected-note@#3 {{because 'convertible_to' would be invalid}} diff --git a/clang/test/SemaCXX/concept-fatal-error.cpp b/clang/test/SemaCXX/concept-fatal-error.cpp index c299b39fdeb23..c606b9e21a364 100644 --- a/clang/test/SemaCXX/concept-fatal-error.cpp +++ b/clang/test/SemaCXX/concept-fatal-error.cpp @@ -1,4 +1,4 @@ -// RUN: not %clang_cc1 -fsyntax-only -std=c++20 -ferror-limit 1 -verify %s +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -ferror-limit 1 -verify %s template concept f = requires { 42; }; @@ -6,5 +6,5 @@ struct h { // The missing semicolon will trigger an error and -ferror-limit=1 will make it fatal // We test that we do not crash in such cases (#55401) int i = requires { { i } f } // expected-error {{expected ';' at end of declaration list}} - // expected-error@* {{too many errros emitted}} + // expected-error@* {{too many errors emitted}} }; diff --git a/clang/test/SemaCXX/constexpr-subobj-initialization.cpp b/clang/test/SemaCXX/constexpr-subobj-initialization.cpp new file mode 100644 index 0000000000000..cd096a9270937 --- /dev/null +++ b/clang/test/SemaCXX/constexpr-subobj-initialization.cpp @@ -0,0 +1,58 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +namespace baseclass_uninit { +struct DelBase { + constexpr DelBase() = delete; // expected-note {{'DelBase' has been explicitly marked deleted here}} +}; + +struct Foo : DelBase { // expected-note 2{{constructor of base class 'DelBase' is not called}} + constexpr Foo() {}; // expected-error {{call to deleted constructor of 'DelBase'}} +}; +constexpr Foo f; // expected-error {{must be initialized by a constant expression}} +struct Bar : Foo { + constexpr Bar() {}; +}; +constexpr Bar bar; // expected-error {{must be initialized by a constant expression}} + +struct Base {}; +struct A : Base { // expected-note {{constructor of base class 'Base' is not called}} + constexpr A() : value() {} // expected-error {{member initializer 'value' does not name a non-static data member or base class}} +}; + +constexpr A a; // expected-error {{must be initialized by a constant expression}} + +struct B : Base { // expected-note {{constructor of base class 'Base' is not called}} + constexpr B() : {} // expected-error {{expected class member or base class name}} +}; + +constexpr B b; // expected-error {{must be initialized by a constant expression}} +} // namespace baseclass_uninit + + +struct Foo { + constexpr Foo(); // expected-note 2{{declared here}} +}; + +constexpr Foo ff; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{undefined constructor 'Foo' cannot be used in a constant expression}} + +struct Bar : protected Foo { + int i; + constexpr Bar() : i(12) {} // expected-note {{undefined constructor 'Foo' cannot be used in a constant expression}} +}; + +constexpr Bar bb; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{in call to 'Bar()'}} + +template +struct Baz { + constexpr Baz(); // expected-note {{declared here}} +}; + +struct Quux : Baz, private Bar { + int i; + constexpr Quux() : i(12) {} // expected-note {{undefined constructor 'Baz' cannot be used in a constant expression}} +}; + +constexpr Quux qx; // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{in call to 'Quux()'}} diff --git a/clang/test/SemaCXX/template-64605.cpp b/clang/test/SemaCXX/template-64605.cpp new file mode 100644 index 0000000000000..9d7f8d4100171 --- /dev/null +++ b/clang/test/SemaCXX/template-64605.cpp @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -ast-dump -ast-dump-filter=b_64605 %s | FileCheck %s + +// https://github.com/llvm/llvm-project/issues/64605 + +#pragma STDC FENV_ACCESS ON +template +int b_64605() { + int x; + if ((float)0xFFFFFFFF != (float)0x100000000) { + x = 1; + } + return x; +} +int f() { return b_64605(); } + +// CHECK: ImplicitCastExpr {{.*}} 'float' RoundingMath=1 AllowFEnvAccess=1 +// CHECK-NEXT: IntegerLiteral {{.*}} 4294967295 + +// CHECK: FunctionDecl {{.*}} b_64605 'int ()' +// CHECK-NEXT: TemplateArgument type 'void' + +// CHECK: ImplicitCastExpr {{.*}} 'float' RoundingMath=1 AllowFEnvAccess=1 +// CHECK-NEXT: IntegerLiteral {{.*}} 4294967295 diff --git a/clang/tools/clang-fuzzer/CMakeLists.txt b/clang/tools/clang-fuzzer/CMakeLists.txt index e68ed8bbcb069..2b9720ee627cb 100644 --- a/clang/tools/clang-fuzzer/CMakeLists.txt +++ b/clang/tools/clang-fuzzer/CMakeLists.txt @@ -115,6 +115,9 @@ add_clang_executable(clang-fuzzer EXCLUDE_FROM_ALL ${DUMMY_MAIN} ClangFuzzer.cpp + + DEPENDS + ClangDriverOptions ) target_link_libraries(clang-fuzzer @@ -127,6 +130,9 @@ add_clang_executable(clang-objc-fuzzer EXCLUDE_FROM_ALL ${DUMMY_MAIN} ClangObjectiveCFuzzer.cpp + + DEPENDS + ClangDriverOptions ) target_link_libraries(clang-objc-fuzzer diff --git a/clang/tools/include-mapping/gen_std.py b/clang/tools/include-mapping/gen_std.py index 2390ff1f2cced..57a5a6772ba89 100755 --- a/clang/tools/include-mapping/gen_std.py +++ b/clang/tools/include-mapping/gen_std.py @@ -242,6 +242,11 @@ def main(): (symbol_index_root, "filesystem.html", "std::filesystem::"), (symbol_index_root, "pmr.html", "std::pmr::"), (symbol_index_root, "ranges.html", "std::ranges::"), + + (symbol_index_root, "views.html", "std::ranges::views::"), + # std::ranges::views can be accessed as std::views. + (symbol_index_root, "views.html", "std::views::"), + (symbol_index_root, "regex_constants.html", "std::regex_constants::"), (symbol_index_root, "this_thread.html", "std::this_thread::"), # Zombie symbols that were available from the Standard Library, but are diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 3a1058f5e3fe9..9b81abda1d2e1 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -7711,6 +7711,47 @@ TEST_P(ASTImporterOptionSpecificTestBase, ImportConstructorUsingShadow) { CheckAST(ToTU, ToC); } +TEST_P(ASTImporterOptionSpecificTestBase, + ImportFunctionDeclBitShouldNotOverwriteCtorDeclBits) { + Decl *From, *To; + std::tie(From, To) = getImportedDecl( + R"s( + struct A { + A() : m() {} + int m; + }; + + A foo() { A a; return a; } + A bar() { return {}; } + )s", + Lang_CXX17, + R"s( + struct A { + A() : m() {} + int m; + }; + A baz() { return {}; } + )s", + Lang_CXX17, "A"); + + auto HasCtorInit = + hasAnyConstructorInitializer(cxxCtorInitializer(isMemberInitializer())); + auto ImpMoveCtor = + cxxConstructorDecl(isMoveConstructor(), isImplicit(), HasCtorInit); + + auto *FromImpMoveCtor = FirstDeclMatcher().match( + From, ImpMoveCtor); + auto *ToImpMoveCtor = FirstDeclMatcher().match( + To, ImpMoveCtor); + + EXPECT_TRUE(FromImpMoveCtor->getNumCtorInitializers() == 1); + EXPECT_FALSE(FromImpMoveCtor->FriendConstraintRefersToEnclosingTemplate()); + + EXPECT_TRUE(ToImpMoveCtor->getNumCtorInitializers() == 1); + EXPECT_FALSE(ToImpMoveCtor->FriendConstraintRefersToEnclosingTemplate()); + EXPECT_TRUE(*ToImpMoveCtor->init_begin()); +} + AST_MATCHER_P(UsingShadowDecl, hasIntroducerDecl, internal::Matcher, InnerMatcher) { return InnerMatcher.matches(*Node.getIntroducer(), Finder, Builder); diff --git a/clang/unittests/AST/DeclTest.cpp b/clang/unittests/AST/DeclTest.cpp index 2ed2ed750941c..d2977b0cb55b6 100644 --- a/clang/unittests/AST/DeclTest.cpp +++ b/clang/unittests/AST/DeclTest.cpp @@ -353,6 +353,32 @@ TEST(Decl, FriendFunctionWithinClassInHeaderUnit) { EXPECT_TRUE(getFooValue->isInlined()); } +TEST(Decl, FunctionDeclBitsShouldNotOverlapWithCXXConstructorDeclBits) { + llvm::Annotations Code(R"( + struct A { + A() : m() {} + int m; + }; + + A f() { return A(); } + )"); + + auto AST = tooling::buildASTFromCodeWithArgs(Code.code(), {"-std=c++14"}); + ASTContext &Ctx = AST->getASTContext(); + + auto HasCtorInit = + hasAnyConstructorInitializer(cxxCtorInitializer(isMemberInitializer())); + auto ImpMoveCtor = + cxxConstructorDecl(isMoveConstructor(), isImplicit(), HasCtorInit) + .bind("MoveCtor"); + + auto *ToImpMoveCtor = + selectFirst("MoveCtor", match(ImpMoveCtor, Ctx)); + + EXPECT_TRUE(ToImpMoveCtor->getNumCtorInitializers() == 1); + EXPECT_FALSE(ToImpMoveCtor->FriendConstraintRefersToEnclosingTemplate()); +} + TEST(Decl, NoProtoFunctionDeclAttributes) { llvm::Annotations Code(R"( void f(); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index e512a861dc4e3..271778b5bb9e6 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -13458,6 +13458,8 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { verifyFormat( "class A {\n" " A() : a{} {}\n" + " A() : Base{} {}\n" + " A() : Base>{} {}\n" " A(int b) : b(b) {}\n" " A(int a, int b) : a(a), bs{{bs...}} { f(); }\n" " int a, b;\n" @@ -26266,6 +26268,7 @@ TEST_F(FormatTest, RemoveParentheses) { Style.RemoveParentheses = FormatStyle::RPS_MultipleParentheses; verifyFormat("int x __attribute__((aligned(16))) = 0;", Style); + verifyFormat("decltype((foo->bar)) baz;", Style); verifyFormat("class __declspec(dllimport) X {};", "class __declspec((dllimport)) X {};", Style); verifyFormat("int x = (({ 0; }));", "int x = ((({ 0; })));", Style); diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp index 338003cd9851c..abb8e6377aabd 100644 --- a/clang/unittests/Interpreter/InterpreterTest.cpp +++ b/clang/unittests/Interpreter/InterpreterTest.cpp @@ -232,10 +232,20 @@ TEST(IncrementalProcessing, FindMangledNameSymbol) { } std::string MangledName = MangleName(FD); - auto Addr = cantFail(Interp->getSymbolAddress(MangledName)); - EXPECT_NE(0U, Addr.getValue()); + auto Addr = Interp->getSymbolAddress(MangledName); + EXPECT_FALSE(!Addr); + EXPECT_NE(0U, Addr->getValue()); GlobalDecl GD(FD); - EXPECT_EQ(Addr, cantFail(Interp->getSymbolAddress(GD))); + EXPECT_EQ(*Addr, cantFail(Interp->getSymbolAddress(GD))); + cantFail( + Interp->ParseAndExecute("extern \"C\" int printf(const char*,...);")); + Addr = Interp->getSymbolAddress("printf"); + EXPECT_FALSE(!Addr); + + // FIXME: Re-enable when we investigate the way we handle dllimports on Win. +#ifndef _WIN32 + EXPECT_EQ((unsigned long long)&printf, Addr->getValue()); +#endif // _WIN32 } static void *AllocateObject(TypeDecl *TD, Interpreter &Interp) { diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index b9b82564b3303..5158e99b75e5d 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -588,19 +588,34 @@ INTERCEPTOR(char*, strncpy, char *to, const char *from, uptr size) { return REAL(strncpy)(to, from, size); } -INTERCEPTOR(long, strtol, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtol); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtol)(nptr, endptr, base); - } +template +static ALWAYS_INLINE auto StrtolImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { + if (!flags()->replace_str) + return real(nptr, endptr, base); char *real_endptr; - long result = REAL(strtol)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; + return res; } +# define INTERCEPTOR_STRTO_BASE(ret_type, func) \ + INTERCEPTOR(ret_type, func, const char *nptr, char **endptr, int base) { \ + void *ctx; \ + ASAN_INTERCEPTOR_ENTER(ctx, func); \ + ENSURE_ASAN_INITED(); \ + return StrtolImpl(ctx, REAL(func), nptr, endptr, base); \ + } + +INTERCEPTOR_STRTO_BASE(long, strtol) +INTERCEPTOR_STRTO_BASE(long long, strtoll) + +# if SANITIZER_GLIBC +INTERCEPTOR_STRTO_BASE(long, __isoc23_strtol) +INTERCEPTOR_STRTO_BASE(long long, __isoc23_strtoll) +# endif + INTERCEPTOR(int, atoi, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoi); @@ -639,20 +654,6 @@ INTERCEPTOR(long, atol, const char *nptr) { return result; } -#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL -INTERCEPTOR(long long, strtoll, const char *nptr, char **endptr, int base) { - void *ctx; - ASAN_INTERCEPTOR_ENTER(ctx, strtoll); - ENSURE_ASAN_INITED(); - if (!flags()->replace_str) { - return REAL(strtoll)(nptr, endptr, base); - } - char *real_endptr; - long long result = REAL(strtoll)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return result; -} - INTERCEPTOR(long long, atoll, const char *nptr) { void *ctx; ASAN_INTERCEPTOR_ENTER(ctx, atoll); @@ -666,7 +667,6 @@ INTERCEPTOR(long long, atoll, const char *nptr) { ASAN_READ_STRING(ctx, nptr, (real_endptr - nptr) + 1); return result; } -#endif // ASAN_INTERCEPT_ATOLL_AND_STRTOLL #if ASAN_INTERCEPT___CXA_ATEXIT || ASAN_INTERCEPT_ATEXIT static void AtCxaAtexit(void *unused) { @@ -751,11 +751,13 @@ void InitializeAsanInterceptors() { ASAN_INTERCEPT_FUNC(atoi); ASAN_INTERCEPT_FUNC(atol); - ASAN_INTERCEPT_FUNC(strtol); -#if ASAN_INTERCEPT_ATOLL_AND_STRTOLL ASAN_INTERCEPT_FUNC(atoll); + ASAN_INTERCEPT_FUNC(strtol); ASAN_INTERCEPT_FUNC(strtoll); -#endif +# if SANITIZER_GLIBC + ASAN_INTERCEPT_FUNC(__isoc23_strtol); + ASAN_INTERCEPT_FUNC(__isoc23_strtoll); +# endif // Intecept jump-related functions. ASAN_INTERCEPT_FUNC(longjmp); diff --git a/compiler-rt/lib/asan/asan_interceptors.h b/compiler-rt/lib/asan/asan_interceptors.h index 268096fea5e7e..d00d05587b368 100644 --- a/compiler-rt/lib/asan/asan_interceptors.h +++ b/compiler-rt/lib/asan/asan_interceptors.h @@ -42,12 +42,10 @@ void InitializePlatformInterceptors(); // Use macro to describe if specific function should be // intercepted on a given platform. #if !SANITIZER_WINDOWS -# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 1 # define ASAN_INTERCEPT__LONGJMP 1 # define ASAN_INTERCEPT_INDEX 1 # define ASAN_INTERCEPT_PTHREAD_CREATE 1 #else -# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0 # define ASAN_INTERCEPT__LONGJMP 0 # define ASAN_INTERCEPT_INDEX 0 # define ASAN_INTERCEPT_PTHREAD_CREATE 0 diff --git a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp index e3a90f18ed81a..0fa636bec0d00 100644 --- a/compiler-rt/lib/asan/asan_win_dll_thunk.cpp +++ b/compiler-rt/lib/asan/asan_win_dll_thunk.cpp @@ -65,6 +65,7 @@ INTERCEPT_WRAP_W_W(_expand_dbg) INTERCEPT_LIBRARY_FUNCTION(atoi); INTERCEPT_LIBRARY_FUNCTION(atol); +INTERCEPT_LIBRARY_FUNCTION(atoll); INTERCEPT_LIBRARY_FUNCTION(frexp); INTERCEPT_LIBRARY_FUNCTION(longjmp); #if SANITIZER_INTERCEPT_MEMCHR @@ -91,6 +92,7 @@ INTERCEPT_LIBRARY_FUNCTION(strspn); INTERCEPT_LIBRARY_FUNCTION(strstr); INTERCEPT_LIBRARY_FUNCTION(strtok); INTERCEPT_LIBRARY_FUNCTION(strtol); +INTERCEPT_LIBRARY_FUNCTION(strtoll); INTERCEPT_LIBRARY_FUNCTION(wcslen); INTERCEPT_LIBRARY_FUNCTION(wcsnlen); diff --git a/compiler-rt/lib/interception/interception.h b/compiler-rt/lib/interception/interception.h index 078d33b61be31..069f73d276f3c 100644 --- a/compiler-rt/lib/interception/interception.h +++ b/compiler-rt/lib/interception/interception.h @@ -181,7 +181,7 @@ const interpose_substitution substitution_##func_name[] \ // FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher // priority than weak ones so weak aliases won't work for indirect calls // in position-independent (-fPIC / -fPIE) mode. -# define __ASM_WEAK_WRAPPER(func) +# define __ASM_WEAK_WRAPPER(func) ".globl " #func "\n" # else # define __ASM_WEAK_WRAPPER(func) ".weak " #func "\n" # endif // SANITIZER_FREEBSD || SANITIZER_NETBSD diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp index f5e0d3cb9a673..ba92bd14d319d 100644 --- a/compiler-rt/lib/msan/msan_interceptors.cpp +++ b/compiler-rt/lib/msan/msan_interceptors.cpp @@ -464,6 +464,25 @@ INTERCEPTORS_STRTO_BASE(long long, wcstoll, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long, wcstoul, wchar_t) INTERCEPTORS_STRTO_BASE(unsigned long long, wcstoull, wchar_t) +#if SANITIZER_GLIBC +INTERCEPTORS_STRTO(double, __isoc23_strtod, char) +INTERCEPTORS_STRTO(float, __isoc23_strtof, char) +INTERCEPTORS_STRTO(long double, __isoc23_strtold, char) +INTERCEPTORS_STRTO_BASE(long, __isoc23_strtol, char) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_strtoll, char) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_strtoul, char) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_strtoull, char) +INTERCEPTORS_STRTO_BASE(u64, __isoc23_strtouq, char) + +INTERCEPTORS_STRTO(double, __isoc23_wcstod, wchar_t) +INTERCEPTORS_STRTO(float, __isoc23_wcstof, wchar_t) +INTERCEPTORS_STRTO(long double, __isoc23_wcstold, wchar_t) +INTERCEPTORS_STRTO_BASE(long, __isoc23_wcstol, wchar_t) +INTERCEPTORS_STRTO_BASE(long long, __isoc23_wcstoll, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long, __isoc23_wcstoul, wchar_t) +INTERCEPTORS_STRTO_BASE(unsigned long long, __isoc23_wcstoull, wchar_t) +#endif + #if SANITIZER_NETBSD #define INTERCEPT_STRTO(func) \ INTERCEPT_FUNCTION(func); \ @@ -1748,6 +1767,24 @@ void InitializeInterceptors() { INTERCEPT_STRTO(wcstoul); INTERCEPT_STRTO(wcstoll); INTERCEPT_STRTO(wcstoull); +#if SANITIZER_GLIBC + INTERCEPT_STRTO(__isoc23_strtod); + INTERCEPT_STRTO(__isoc23_strtof); + INTERCEPT_STRTO(__isoc23_strtold); + INTERCEPT_STRTO(__isoc23_strtol); + INTERCEPT_STRTO(__isoc23_strtoul); + INTERCEPT_STRTO(__isoc23_strtoll); + INTERCEPT_STRTO(__isoc23_strtoull); + INTERCEPT_STRTO(__isoc23_strtouq); + INTERCEPT_STRTO(__isoc23_wcstod); + INTERCEPT_STRTO(__isoc23_wcstof); + INTERCEPT_STRTO(__isoc23_wcstold); + INTERCEPT_STRTO(__isoc23_wcstol); + INTERCEPT_STRTO(__isoc23_wcstoul); + INTERCEPT_STRTO(__isoc23_wcstoll); + INTERCEPT_STRTO(__isoc23_wcstoull); +#endif + #ifdef SANITIZER_NLDBL_VERSION INTERCEPT_FUNCTION_VER(vswprintf, SANITIZER_NLDBL_VERSION); INTERCEPT_FUNCTION_VER(swprintf, SANITIZER_NLDBL_VERSION); diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc index 299561b3ad3a1..0e563fa12022a 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc @@ -1491,6 +1491,16 @@ VSCANF_INTERCEPTOR_IMPL(__isoc99_vsscanf, false, str, format, ap) INTERCEPTOR(int, __isoc99_vfscanf, void *stream, const char *format, va_list ap) VSCANF_INTERCEPTOR_IMPL(__isoc99_vfscanf, false, stream, format, ap) + +INTERCEPTOR(int, __isoc23_vscanf, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vscanf, false, format, ap) + +INTERCEPTOR(int, __isoc23_vsscanf, const char *str, const char *format, + va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vsscanf, false, str, format, ap) + +INTERCEPTOR(int, __isoc23_vfscanf, void *stream, const char *format, va_list ap) +VSCANF_INTERCEPTOR_IMPL(__isoc23_vfscanf, false, stream, format, ap) #endif // SANITIZER_INTERCEPT_ISOC99_SCANF INTERCEPTOR(int, scanf, const char *format, ...) @@ -1511,6 +1521,15 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_fscanf, __isoc99_vfscanf, stream, format) INTERCEPTOR(int, __isoc99_sscanf, const char *str, const char *format, ...) FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) + +INTERCEPTOR(int, __isoc23_scanf, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_scanf, __isoc23_vscanf, format) + +INTERCEPTOR(int, __isoc23_fscanf, void *stream, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_fscanf, __isoc23_vfscanf, stream, format) + +INTERCEPTOR(int, __isoc23_sscanf, const char *str, const char *format, ...) +FORMAT_INTERCEPTOR_IMPL(__isoc23_sscanf, __isoc23_vsscanf, str, format) #endif #endif @@ -1534,7 +1553,13 @@ FORMAT_INTERCEPTOR_IMPL(__isoc99_sscanf, __isoc99_vsscanf, str, format) COMMON_INTERCEPT_FUNCTION(__isoc99_fscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vscanf); \ COMMON_INTERCEPT_FUNCTION(__isoc99_vsscanf); \ - COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); + COMMON_INTERCEPT_FUNCTION(__isoc99_vfscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_scanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_sscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_fscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vsscanf); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_vfscanf); #else #define INIT_ISOC99_SCANF #endif @@ -3539,30 +3564,26 @@ UNUSED static inline void StrtolFixAndCheck(void *ctx, const char *nptr, (real_endptr - nptr) + 1 : 0); } - #if SANITIZER_INTERCEPT_STRTOIMAX -INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { - void *ctx; - COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. +template +static ALWAYS_INLINE auto StrtoimaxImpl(void *ctx, Fn real, const char *nptr, + char **endptr, int base) + -> decltype(real(nullptr, nullptr, 0)) { char *real_endptr; - INTMAX_T res = REAL(strtoimax)(nptr, &real_endptr, base); + auto res = real(nptr, &real_endptr, base); StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); return res; } +INTERCEPTOR(INTMAX_T, strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(strtoimax), nptr, endptr, base); +} INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { void *ctx; COMMON_INTERCEPTOR_ENTER(ctx, strtoumax, nptr, endptr, base); - // FIXME: under ASan the call below may write to freed memory and corrupt - // its metadata. See - // https://github.com/google/sanitizers/issues/321. - char *real_endptr; - UINTMAX_T res = REAL(strtoumax)(nptr, &real_endptr, base); - StrtolFixAndCheck(ctx, nptr, endptr, real_endptr, base); - return res; + return StrtoimaxImpl(ctx, REAL(strtoumax), nptr, endptr, base); } #define INIT_STRTOIMAX \ @@ -3572,6 +3593,25 @@ INTERCEPTOR(UINTMAX_T, strtoumax, const char *nptr, char **endptr, int base) { #define INIT_STRTOIMAX #endif +#if SANITIZER_INTERCEPT_STRTOIMAX && SANITIZER_GLIBC +INTERCEPTOR(INTMAX_T, __isoc23_strtoimax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoimax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoimax), nptr, endptr, base); +} +INTERCEPTOR(UINTMAX_T, __isoc23_strtoumax, const char *nptr, char **endptr, int base) { + void *ctx; + COMMON_INTERCEPTOR_ENTER(ctx, __isoc23_strtoumax, nptr, endptr, base); + return StrtoimaxImpl(ctx, REAL(__isoc23_strtoumax), nptr, endptr, base); +} + +# define INIT_STRTOIMAX_C23 \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoimax); \ + COMMON_INTERCEPT_FUNCTION(__isoc23_strtoumax); +#else +# define INIT_STRTOIMAX_C23 +#endif + #if SANITIZER_INTERCEPT_MBSTOWCS INTERCEPTOR(SIZE_T, mbstowcs, wchar_t *dest, const char *src, SIZE_T len) { void *ctx; @@ -10304,6 +10344,7 @@ static void InitializeCommonInterceptors() { INIT_GETCWD; INIT_GET_CURRENT_DIR_NAME; INIT_STRTOIMAX; + INIT_STRTOIMAX_C23; INIT_MBSTOWCS; INIT_MBSNRTOWCS; INIT_WCSTOMBS; diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc index 220abb89c3beb..24485900644b3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc +++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors_format.inc @@ -340,11 +340,19 @@ static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, size = 0; } COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); - // For %ms/%mc, write the allocated output buffer as well. + // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well. if (dir.allocate) { - char *buf = *(char **)argp; - if (buf) - COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, internal_strlen(buf) + 1); + if (char *buf = *(char **)argp) { + if (dir.convSpecifier == 'c') + size = 1; + else if (dir.convSpecifier == 'C') + size = sizeof(wchar_t); + else if (dir.convSpecifier == 'S') + size = (internal_wcslen((wchar_t *)buf) + 1) * sizeof(wchar_t); + else // 's' or '[' + size = internal_strlen(buf) + 1; + COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); + } } } } diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt index 509e3f19fe386..819cfca44b00b 100644 --- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt +++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt @@ -34,6 +34,13 @@ __interceptor_pthread_setspecific w __interceptor_read w __interceptor_realpath w __isinf U +__isoc23_sscanf U +__isoc23_strtol U +__isoc23_strtoll U +__isoc23_strtoll_l U +__isoc23_strtoull U +__isoc23_strtoull_l U +__isoc23_vsscanf U __isoc99_sscanf U __isoc99_vsscanf U __moddi3 U diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cpp index fa52ccc1994f6..de96e573ab844 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_format_interceptor_test.cpp @@ -9,14 +9,16 @@ // Tests for *scanf interceptors implementation in sanitizer_common. // //===----------------------------------------------------------------------===// +#include + #include #include +#include "gtest/gtest.h" #include "interception/interception.h" -#include "sanitizer_test_utils.h" -#include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_common.h" -#include "gtest/gtest.h" +#include "sanitizer_common/sanitizer_libc.h" +#include "sanitizer_test_utils.h" using namespace __sanitizer; @@ -206,21 +208,35 @@ TEST(SanitizerCommonInterceptors, Scanf) { TEST(SanitizerCommonInterceptors, ScanfAllocate) { const char *buf = "123456"; + const wchar_t *wbuf = L"123"; // Can not use testScanf() because this case needs a valid pointer to a string // in the scanf argument. + { + std::vector scanf_sizes; + testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%mc", &buf); + verifyFormatResults("%mc", 2, scanf_sizes, {P, 1u}); + } + { + std::vector scanf_sizes; + testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%mC", &wbuf); + verifyFormatResults("%mC", 2, scanf_sizes, {P, (unsigned)sizeof(wchar_t)}); + } { std::vector scanf_sizes; testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%ms", &buf); - verifyFormatResults("%ms", 2, scanf_sizes, - {P, (unsigned)(strlen(buf) + 1)}); + verifyFormatResults("%ms", 2, scanf_sizes, {P, unsigned(strlen(buf) + 1)}); + scanf_sizes.clear(); + testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%m[0-9]", + &buf); + verifyFormatResults("%m[0-9]", 2, scanf_sizes, + {P, unsigned(strlen(buf) + 1)}); } - { std::vector scanf_sizes; - testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%mc", &buf); - verifyFormatResults("%mc", 2, scanf_sizes, - {P, (unsigned)(strlen(buf) + 1)}); + testScanf3((void *)&scanf_sizes, 2, /*allowGnuMalloc=*/false, "%mS", &wbuf); + verifyFormatResults("%mS", 2, scanf_sizes, + {P, unsigned((wcslen(wbuf) + 1) * sizeof(wchar_t))}); } } diff --git a/compiler-rt/test/asan/TestCases/atoll_strict.c b/compiler-rt/test/asan/TestCases/atoll_strict.c index 431ec6b4ba230..b204c97b17580 100644 --- a/compiler-rt/test/asan/TestCases/atoll_strict.c +++ b/compiler-rt/test/asan/TestCases/atoll_strict.c @@ -10,9 +10,6 @@ // RUN: %env_asan_opts=strict_string_checks=false %run %t test3 2>&1 // RUN: %env_asan_opts=strict_string_checks=true not %run %t test3 2>&1 | FileCheck %s --check-prefix=CHECK3 -// FIXME: Needs Windows interceptor. -// XFAIL: target={{.*windows-(msvc.*|gnu)}} - #include #include #include diff --git a/compiler-rt/test/asan/TestCases/strtoll_strict.c b/compiler-rt/test/asan/TestCases/strtoll_strict.c index 097412e3ab5c2..88e6651b6ed11 100644 --- a/compiler-rt/test/asan/TestCases/strtoll_strict.c +++ b/compiler-rt/test/asan/TestCases/strtoll_strict.c @@ -24,7 +24,7 @@ // FIXME: Enable strtoll interceptor. // REQUIRES: shadow-scale-3 -// XFAIL: target={{.*windows-(msvc.*|gnu)}} +// XFAIL: target={{.*windows-msvc.*}} #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/scanf.c b/compiler-rt/test/sanitizer_common/TestCases/scanf.c new file mode 100644 index 0000000000000..a42d9f72a71d9 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/scanf.c @@ -0,0 +1,24 @@ +// RUN: %clang -std=c17 %s -o %t && %run %t +/// Test __isoc23_* for glibc 2.38+. +// RUN: %clang -std=c2x %s -o %t && %run %t + +#include +#include +#include + +int test_vsscanf(const char *buf, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + int ret = vsscanf(buf, fmt, ap); + va_end(ap); + return ret; +} + +int main(int argc, char **argv) { + int x, y; + assert(sscanf("42", "%d", &x) == 1); + assert(x == 42); + assert(test_vsscanf("42", "%d", &y) == 1); + assert(y == 42); + return 0; +} diff --git a/compiler-rt/test/sanitizer_common/TestCases/strtol.c b/compiler-rt/test/sanitizer_common/TestCases/strtol.c new file mode 100644 index 0000000000000..c3de9bcb7aa04 --- /dev/null +++ b/compiler-rt/test/sanitizer_common/TestCases/strtol.c @@ -0,0 +1,61 @@ +// RUN: %clang -std=c17 %s -o %t && %run %t +/// Test __isoc23_* for glibc 2.38+. +// RUN: %clang -std=c2x %s -o %t && %run %t + +#include +#include +#include +#include +#include + +#define TESTL(func) \ + { \ + char *end; \ + long l = (long)func("42", &end, 0); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +#define TESTF(func) \ + { \ + char *end; \ + long l = (long)func("42", &end); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +#define WTESTL(func) \ + { \ + wchar_t *end; \ + long l = (long)func(L"42", &end, 0); \ + assert(l == 42); \ + assert(*end == L'\0'); \ + } + +#define WTESTF(func) \ + { \ + wchar_t *end; \ + long l = (long)func(L"42", &end); \ + assert(l == 42); \ + assert(*end == '\0'); \ + } + +int main() { + TESTL(strtol); + TESTL(strtoll); + TESTL(strtoimax); + TESTL(strtoul); + TESTL(strtoull); + TESTL(strtoumax); + TESTF(strtof); + TESTF(strtod); + TESTF(strtold); + + WTESTL(wcstol); + WTESTL(wcstoll); + WTESTL(wcstoul); + WTESTL(wcstoull); + WTESTF(wcstof); + WTESTF(wcstod); + WTESTF(wcstold); +} diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 61ba0f584ae6d..960fc62190f5f 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -128,9 +128,8 @@ static mlir::ParseResult parseAllocatableOp(FN wrapResultType, parser.emitError(parser.getNameLoc(), "invalid allocate type: ") << intype; return mlir::failure(); } - result.addAttribute( - "operand_segment_sizes", - builder.getDenseI32ArrayAttr({typeparamsSize, shapeSize})); + result.addAttribute("operandSegmentSizes", builder.getDenseI32ArrayAttr( + {typeparamsSize, shapeSize})); if (parser.parseOptionalAttrDict(result.attributes) || parser.addTypeToList(restype, result.types)) return mlir::failure(); @@ -149,7 +148,7 @@ static void printAllocatableOp(mlir::OpAsmPrinter &p, OP &op) { p << ", "; p.printOperand(sh); } - p.printOptionalAttrDict(op->getAttrs(), {"in_type", "operand_segment_sizes"}); + p.printOptionalAttrDict(op->getAttrs(), {"in_type", "operandSegmentSizes"}); } //===----------------------------------------------------------------------===// diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index bf03c24fee75c..d67198d97699e 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -28,7 +28,7 @@ func.func @_QPsb1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref, pinned} : (i64) -> !llvm.ptr +// CHECK: %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr // CHECK: %[[N:.*]] = llvm.load %[[N_REF]] : !llvm.ptr // CHECK: omp.wsloop nowait // CHECK-SAME: for (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) inclusive step (%[[ONE_2]]) { @@ -200,7 +200,7 @@ func.func @_QPsimd1(%arg0: !fir.ref {fir.bindc_name = "n"}, %arg1: !fir.ref // CHECK: %[[ONE_2:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: omp.parallel { // CHECK: %[[ONE_3:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = array, pinned} : (i64) -> !llvm.ptr +// CHECK: %[[I_VAR:.*]] = llvm.alloca %[[ONE_3]] x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr // CHECK: %[[N:.*]] = llvm.load %[[N_REF]] : !llvm.ptr // CHECK: omp.simdloop // CHECK-SAME: (%[[I:.*]]) : i32 = (%[[ONE_2]]) to (%[[N]]) step (%[[ONE_2]]) { @@ -231,13 +231,13 @@ func.func @_QPomp_target_data() { // CHECK-LABEL: llvm.func @_QPomp_target_data() { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operand_segment_sizes = array, uniq_name = "_QFomp_target_dataEa"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEa"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x !llvm.array<1024 x i32> {bindc_name = "b", in_type = !fir.array<1024xi32>, operand_segment_sizes = array, uniq_name = "_QFomp_target_dataEb"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x !llvm.array<1024 x i32> {bindc_name = "b", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEb"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.array<1024 x i32> {bindc_name = "c", in_type = !fir.array<1024xi32>, operand_segment_sizes = array, uniq_name = "_QFomp_target_dataEc"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.array<1024 x i32> {bindc_name = "c", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEc"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_7:.*]] = llvm.alloca %[[VAL_6]] x !llvm.array<1024 x i32> {bindc_name = "d", in_type = !fir.array<1024xi32>, operand_segment_sizes = array, uniq_name = "_QFomp_target_dataEd"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_7:.*]] = llvm.alloca %[[VAL_6]] x !llvm.array<1024 x i32> {bindc_name = "d", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_target_dataEd"} : (i64) -> !llvm.ptr> // CHECK: omp.target_enter_data map((to -> %[[VAL_1]] : !llvm.ptr>), (to -> %[[VAL_3]] : !llvm.ptr>), (always, alloc -> %[[VAL_5]] : !llvm.ptr>)) // CHECK: omp.target_exit_data map((from -> %[[VAL_1]] : !llvm.ptr>), (from -> %[[VAL_3]] : !llvm.ptr>), (release -> %[[VAL_5]] : !llvm.ptr>), (always, delete -> %[[VAL_7]] : !llvm.ptr>)) // CHECK: llvm.return @@ -278,9 +278,9 @@ func.func @_QPopenmp_target_data_region() { // CHECK-LABEL: llvm.func @_QPopenmp_target_data_region() { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_data_regionEa"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a", in_type = !fir.array<1024xi32>, operandSegmentSizes = array, uniq_name = "_QFopenmp_target_data_regionEa"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_data_regionEi"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFopenmp_target_data_regionEi"} : (i64) -> !llvm.ptr // CHECK: omp.target_data map((tofrom -> %[[VAL_1]] : !llvm.ptr>)) { // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[VAL_5:.*]] = llvm.sext %[[VAL_4]] : i32 to i64 @@ -338,7 +338,7 @@ func.func @_QPomp_target() { // CHECK-LABEL: llvm.func @_QPomp_target() { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<512 x i32> {bindc_name = "a", in_type = !fir.array<512xi32>, operand_segment_sizes = array, uniq_name = "_QFomp_targetEa"} : (i64) -> !llvm.ptr> +// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<512 x i32> {bindc_name = "a", in_type = !fir.array<512xi32>, operandSegmentSizes = array, uniq_name = "_QFomp_targetEa"} : (i64) -> !llvm.ptr> // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(64 : i32) : i32 // CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map((tofrom -> %[[VAL_1]] : !llvm.ptr>)) { // CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32 @@ -544,7 +544,7 @@ func.func @_QPsb() { // CHECK: llvm.func @_QPsb() { // CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[LI_REF:.*]] = llvm.alloca %6 x i32 {bindc_name = "li", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFsbEli"} : (i64) -> !llvm.ptr +// CHECK: %[[LI_REF:.*]] = llvm.alloca %6 x i32 {bindc_name = "li", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFsbEli"} : (i64) -> !llvm.ptr // CHECK: omp.sections { // CHECK: omp.section { // CHECK: llvm.br ^[[BB_ENTRY:.*]]({{.*}}) @@ -582,7 +582,7 @@ func.func @_QPsb() { // CHECK: } // CHECK-LABEL: @_QPsimple_reduction // CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr> -// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %2 x i32 {bindc_name = "x", in_type = !fir.logical<4>, operand_segment_sizes = array, uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr +// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %2 x i32 {bindc_name = "x", in_type = !fir.logical<4>, operandSegmentSizes = array, uniq_name = "_QFsimple_reductionEx"} : (i64) -> !llvm.ptr // CHECK: omp.parallel { // CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] -> %[[RED_ACCUMULATOR]] : !llvm.ptr) for // CHECK: %[[ARRAY_ELEM_REF:.*]] = llvm.getelementptr %[[ARRAY_REF]][0, %{{.*}}] : (!llvm.ptr>, i64) -> !llvm.ptr diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index d0c154fb0376e..52716afe3198d 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1748,7 +1748,7 @@ func.func @no_reassoc(%arg0: !fir.ref) { // CHECK-LABEL: llvm.func @no_reassoc( // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr) { // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr +// CHECK: %[[ALLOC:.*]] = llvm.alloca %[[C1]] x i32 {in_type = i32, operandSegmentSizes = array} : (i64) -> !llvm.ptr // CHECK: %[[LOAD:.*]] = llvm.load %[[ARG0]] : !llvm.ptr // CHECK: llvm.store %[[LOAD]], %[[ALLOC]] : !llvm.ptr // CHECK: llvm.return @@ -1868,7 +1868,7 @@ func.func private @_QPxb(!fir.box>) // CHECK: %[[C1_0:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[C1_0]], %[[N1]] : i64 // CHECK: %[[ARR_SIZE:.*]] = llvm.mul %[[ARR_SIZE_TMP1]], %[[N2]] : i64 -// CHECK: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operand_segment_sizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr +// CHECK: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr", in_type = !fir.array, operandSegmentSizes = array, uniq_name = "_QFsbEarr"} : (i64) -> !llvm.ptr // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(28 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] @@ -1945,9 +1945,9 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 // CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr +// CHECK: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFtest_dt_sliceEv"} : (i64) -> !llvm.ptr // CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operand_segment_sizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> +// CHECK: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x", in_type = !fir.array<20x!fir.type<_QFtest_dt_sliceTt{i:i32,j:i32}>>, operandSegmentSizes = array, uniq_name = "_QFtest_dt_sliceEx"} : (i64) -> !llvm.ptr>> // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.null : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] diff --git a/libcxx/docs/Contributing.rst b/libcxx/docs/Contributing.rst index cfae4d67ffdf4..3e3032ece99e4 100644 --- a/libcxx/docs/Contributing.rst +++ b/libcxx/docs/Contributing.rst @@ -49,7 +49,7 @@ sure you don't forget anything: - Did you add it to ``include/module.modulemap.in``? - Did you add it to ``include/CMakeLists.txt``? - - If it's a public header, did you update ``utils/libcxx/test/header_information.py``? + - If it's a public header, did you update ``utils/libcxx/header_information.py``? - Did you add the relevant feature test macro(s) for your feature? Did you update the ``generate_feature_test_macro_components.py`` script with it? - Did you run the ``libcxx-generate-files`` target and verify its output? diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 27ec0a295f4f4..bb62c1ce10c15 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -245,6 +245,9 @@ __handle_replacement_field(_Iterator __begin, _Iterator __end, using _CharT = iter_value_t<_Iterator>; __format::__parse_number_result __r = __format::__parse_arg_id(__begin, __end, __parse_ctx); + if (__r.__last == __end) + std::__throw_format_error("The argument index should end with a ':' or a '}'"); + bool __parse = *__r.__last == _CharT(':'); switch (*__r.__last) { case _CharT(':'): diff --git a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h index 0e2e91af7d190..5946ed698e0fd 100644 --- a/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ b/libcxx/include/__locale_dir/locale_base_api/locale_guard.h @@ -10,6 +10,7 @@ #define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H #include <__config> +#include <__locale> // for locale_t #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__std_clang_module b/libcxx/include/__std_clang_module index 61a926eb6307e..4d02336d30b06 100644 --- a/libcxx/include/__std_clang_module +++ b/libcxx/include/__std_clang_module @@ -7,6 +7,10 @@ // //===----------------------------------------------------------------------===// +// WARNING, this entire header is generated by +// utils/generate_std_clang_module_header.py +// DO NOT MODIFY! + // This header should not be directly included, it's exclusively to import all // of the libc++ public clang modules for the `std` clang module to export. In // other words, it's to facilitate `@import std;` in Objective-C++ and `import std` @@ -17,7 +21,6 @@ # error "Do not include this header directly, include individual headers instead" #endif -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -27,52 +30,187 @@ #include #include #include -#include +#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif #include #include +#include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include +#endif #include +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif #include +#include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#include #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#include #include #include #include +#include #include #include #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include #include #include #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif #include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#if !defined(_LIBCPP_HAS_NO_ATOMIC_HEADER) +# include +#endif +#include +#include #include +#include +#include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif +#include #include #include +#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include +#endif #include +#include +#if !defined(_LIBCPP_HAS_NO_THREADS) +# include +#endif #include #include #include #include +#include #include #include #include @@ -80,133 +218,9 @@ #include #include #include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include -# ifndef _LIBCPP_HAS_NO_FILESYSTEM -# include -# endif -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include - -# include - -# include -#endif - -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -# include -# include - +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) # include -# include -#endif - -#ifdef _LIBCPP_AVAILABILITY_TO_CHARS_FLOATING_POINT -# include -#endif - -#ifndef _LIBCPP_CXX03_LANG -# ifndef _LIBCPP_HAS_NO_THREADS -# include -# include -# include -# endif - -# include -# include -# include -# include -# include -# include -# include -# ifndef _LIBCPP_HAS_NO_LOCALIZATION -# include -# endif -# include -# include -# include -# include -# include -# include -# include -# include #endif - -#if _LIBCPP_STD_VER >= 14 -# ifndef _LIBCPP_HAS_NO_THREADS -# include -# endif -#endif - -#if _LIBCPP_STD_VER >= 17 -# ifndef _LIBCPP_HAS_NO_FILESYSTEM -# include -# endif -#endif - -#if _LIBCPP_STD_VER >= 20 -# include - -# ifndef _LIBCPP_HAS_NO_THREADS -# include -# include -# include -# include -# endif -#endif - -#if _LIBCPP_STD_VER >= 23 -# ifndef _LIBCPP_HAS_NO_THREADS -# include -# endif +#if !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# include #endif diff --git a/libcxx/include/__type_traits/is_nothrow_constructible.h b/libcxx/include/__type_traits/is_nothrow_constructible.h index d4686d89fd96e..4949062433b78 100644 --- a/libcxx/include/__type_traits/is_nothrow_constructible.h +++ b/libcxx/include/__type_traits/is_nothrow_constructible.h @@ -22,7 +22,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_builtin(__is_nothrow_constructible) +// GCC is disabled due to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106611 +#if __has_builtin(__is_nothrow_constructible) && !defined(_LIBCPP_COMPILER_GCC) template < class _Tp, class... _Args> struct _LIBCPP_TEMPLATE_VIS is_nothrow_constructible diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index bbb7090fd4bea..37a9edcd7ece1 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -16,7 +16,6 @@ module std_atomic [system] { export * } module std_barrier [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "barrier" export * } @@ -37,7 +36,6 @@ module std_chrono [system] { export * } module std_codecvt [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "codecvt" export * } @@ -78,7 +76,6 @@ module std_expected [system] { export * } module std_filesystem [system] { - @requires_LIBCXX_ENABLE_FILESYSTEM@ header "filesystem" export * } @@ -91,8 +88,6 @@ module std_forward_list [system] { export * } module std_fstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ - @requires_LIBCXX_ENABLE_FILESYSTEM@ header "fstream" export * } @@ -101,7 +96,6 @@ module std_functional [system] { export * } module std_future [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "future" export * } @@ -110,12 +104,10 @@ module std_initializer_list [system] { export * } module std_iomanip [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "iomanip" export * } module std_ios [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "ios" export * } @@ -124,12 +116,10 @@ module std_iosfwd [system] { export * } module std_iostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "iostream" export * } module std_istream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "istream" export * } @@ -138,7 +128,6 @@ module std_iterator [system] { export * } module std_latch [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "latch" export * } @@ -151,7 +140,6 @@ module std_list [system] { export * } module std_locale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "locale" export * } @@ -192,7 +180,6 @@ module std_optional [system] { export * } module std_ostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "ostream" export * } @@ -217,7 +204,6 @@ module std_ratio [system] { export * } module std_regex [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "regex" export * } @@ -226,7 +212,6 @@ module std_scoped_allocator [system] { export * } module std_semaphore [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "semaphore" export * } @@ -235,7 +220,6 @@ module std_set [system] { export * } module std_shared_mutex [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "shared_mutex" export std_version } @@ -250,7 +234,6 @@ module std_span [system] { export std_private_span_span_fwd } module std_sstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "sstream" export * } @@ -263,12 +246,10 @@ module std_stdexcept [system] { export * } module std_stop_token { - @requires_LIBCXX_ENABLE_THREADS@ header "stop_token" export * } module std_streambuf [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "streambuf" export * } @@ -281,7 +262,6 @@ module std_string_view [system] { export * } module std_strstream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "strstream" export * } @@ -290,7 +270,6 @@ module std_system_error [system] { export * } module std_thread [system] { - @requires_LIBCXX_ENABLE_THREADS@ header "thread" export * } @@ -377,7 +356,6 @@ module std_climits [system] { export * } module std_clocale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "clocale" export * } @@ -435,12 +413,10 @@ module std_cuchar [system] { export * } module std_cwchar [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "cwchar" export * } module std_cwctype [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "cwctype" export * } @@ -477,7 +453,6 @@ module std_limits_h [system] { export * } module std_locale_h [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "locale.h" export * } @@ -493,8 +468,6 @@ module std_setjmp_h [system] { // FIXME: is missing. // provided by compiler. module std_stdatomic_h [system] { - @requires_LIBCXX_ENABLE_THREADS@ - requires cplusplus23 header "stdatomic.h" export * } @@ -536,21 +509,17 @@ module std_uchar_h [system] { } // provided by C library. module std_wchar_h [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ // 's __need_* macros require textual inclusion. textual header "wchar.h" export * } module std_wctype_h [system] { - @requires_LIBCXX_ENABLE_WIDE_CHARACTERS@ header "wctype.h" export * } // Experimental C++ standard library interfaces module std_experimental [system] { - requires cplusplus11 - module deque { header "experimental/deque" export * @@ -657,7 +626,6 @@ module std_private_hash_table [system] { export * } module std_private_locale [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__locale" export * } @@ -1196,7 +1164,6 @@ module std_private_chrono_duration [system] { } module std_private_chrono_file_clock [system] { header "__chrono/file_clock.h" } module std_private_chrono_formatter [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/formatter.h" } module std_private_chrono_hh_mm_ss [system] { header "__chrono/hh_mm_ss.h" } @@ -1210,11 +1177,9 @@ module std_private_chrono_month [system] { header "__chrono/mon module std_private_chrono_month_weekday [system] { header "__chrono/month_weekday.h" } module std_private_chrono_monthday [system] { header "__chrono/monthday.h" } module std_private_chrono_ostream [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/ostream.h" } module std_private_chrono_parser_std_format_spec [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__chrono/parser_std_format_spec.h" } module std_private_chrono_statically_widen [system] { header "__chrono/statically_widen.h" } @@ -1699,7 +1664,6 @@ module std_private_ranges_filter_view [system] { module std_private_ranges_from_range [system] { header "__ranges/from_range.h" } module std_private_ranges_iota_view [system] { header "__ranges/iota_view.h" } module std_private_ranges_istream_view [system] { - @requires_LIBCXX_ENABLE_LOCALIZATION@ header "__ranges/istream_view.h" } module std_private_ranges_join_view [system] { diff --git a/libcxx/src/chrono.cpp b/libcxx/src/chrono.cpp index 0990d8dc181c2..f1596132024c9 100644 --- a/libcxx/src/chrono.cpp +++ b/libcxx/src/chrono.cpp @@ -31,7 +31,7 @@ # include // for gettimeofday and timeval #endif -#if defined(__APPLE__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) +#if defined(__APPLE__) || defined (__gnu_hurd__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) # define _LIBCPP_HAS_CLOCK_GETTIME #endif diff --git a/libcxx/src/filesystem/filesystem_clock.cpp b/libcxx/src/filesystem/filesystem_clock.cpp index d00cdc6df3437..fbb19ac68df55 100644 --- a/libcxx/src/filesystem/filesystem_clock.cpp +++ b/libcxx/src/filesystem/filesystem_clock.cpp @@ -29,7 +29,7 @@ # include // for gettimeofday and timeval #endif -#if defined(__APPLE__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) +#if defined(__APPLE__) || defined (__gnu_hurd__) || (defined(_POSIX_TIMERS) && _POSIX_TIMERS > 0) # define _LIBCPP_HAS_CLOCK_GETTIME #endif diff --git a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py b/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py index f72c257402936..a4e1c3c29c936 100644 --- a/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py +++ b/libcxx/test/libcxx/assertions/headers_declare_verbose_abort.gen.py @@ -14,7 +14,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: # Skip C compatibility headers. diff --git a/libcxx/test/libcxx/clang_tidy.gen.py b/libcxx/test/libcxx/clang_tidy.gen.py index a7b8e7b3ec549..b2f1a171507d1 100644 --- a/libcxx/test/libcxx/clang_tidy.gen.py +++ b/libcxx/test/libcxx/clang_tidy.gen.py @@ -12,7 +12,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script diff --git a/libcxx/test/libcxx/double_include.gen.py b/libcxx/test/libcxx/double_include.gen.py index ad18121d53be0..85055dfc703de 100644 --- a/libcxx/test/libcxx/double_include.gen.py +++ b/libcxx/test/libcxx/double_include.gen.py @@ -12,7 +12,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script diff --git a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp index cdae8e8834e65..d75951fdf890e 100644 --- a/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp +++ b/libcxx/test/libcxx/gdb/gdb_pretty_printer_test.sh.cpp @@ -15,7 +15,7 @@ // UNSUPPORTED: clang-15, clang-16, clang-17 // TODO: Investigate this failure on GCC 12 (in Ubuntu Jammy) -// UNSUPPORTED: gcc-12 +// UNSUPPORTED: gcc-12, gcc-13 // RUN: %{cxx} %{flags} %s -o %t.exe %{compile_flags} -g %{link_flags} // Ensure locale-independence for unicode tests. diff --git a/libcxx/test/libcxx/header_inclusions.gen.py b/libcxx/test/libcxx/header_inclusions.gen.py index f41ac27b651b3..cdbc5b34b5152 100644 --- a/libcxx/test/libcxx/header_inclusions.gen.py +++ b/libcxx/test/libcxx/header_inclusions.gen.py @@ -13,7 +13,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers, mandatory_inclusions +from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions for header in public_headers: header_guard = lambda h: f"_LIBCPP_{h.upper().replace('.', '_').replace('/', '_')}" diff --git a/libcxx/test/libcxx/libcpp_version.gen.py b/libcxx/test/libcxx/libcpp_version.gen.py index 6a43d5dc3e4ae..47439b08fe51b 100644 --- a/libcxx/test/libcxx/libcpp_version.gen.py +++ b/libcxx/test/libcxx/libcpp_version.gen.py @@ -12,7 +12,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: print(f"""\ diff --git a/libcxx/test/libcxx/module_std.gen.py b/libcxx/test/libcxx/module_std.gen.py index 787317888d20d..db0678e221bef 100644 --- a/libcxx/test/libcxx/module_std.gen.py +++ b/libcxx/test/libcxx/module_std.gen.py @@ -21,7 +21,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import toplevel_headers +from libcxx.header_information import toplevel_headers BLOCKLIT = ( "" # block Lit from interpreting a RUN/XFAIL/etc inside the generation script diff --git a/libcxx/test/libcxx/modules_include.gen.py b/libcxx/test/libcxx/modules_include.gen.py index b6bad1b8a104d..2e9fd73421ed2 100644 --- a/libcxx/test/libcxx/modules_include.gen.py +++ b/libcxx/test/libcxx/modules_include.gen.py @@ -14,10 +14,11 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers + +BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script for header in public_headers: - BLOCKLIT = '' # block Lit from interpreting a RUN/XFAIL/etc inside the generation script print(f"""\ //--- {header}.compile.pass.cpp // RUN{BLOCKLIT}: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only @@ -45,7 +46,7 @@ #include <{header}> """) -print(f""" +print(f"""\ //--- __std_clang_module.compile.pass.mm // RUN{BLOCKLIT}: %{{cxx}} %s %{{flags}} %{{compile_flags}} -fmodules -fcxx-modules -fmodules-cache-path=%t -fsyntax-only @@ -68,10 +69,6 @@ // TODO: Investigate this failure // UNSUPPORTED{BLOCKLIT}: LIBCXX-FREEBSD-FIXME -// Lit seems to compile this twice: once with the default flags and once with with -// the flags specified in the RUN directive. Guard the first compile from failing. -#if __has_feature(modules) @import std; -#endif """) diff --git a/libcxx/test/libcxx/nasty_macros.gen.py b/libcxx/test/libcxx/nasty_macros.gen.py index fdc308416f341..3c501a981d033 100644 --- a/libcxx/test/libcxx/nasty_macros.gen.py +++ b/libcxx/test/libcxx/nasty_macros.gen.py @@ -13,7 +13,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: print(f"""\ diff --git a/libcxx/test/libcxx/no_assert_include.gen.py b/libcxx/test/libcxx/no_assert_include.gen.py index 45152a35f3177..a5e733d2b48a1 100644 --- a/libcxx/test/libcxx/no_assert_include.gen.py +++ b/libcxx/test/libcxx/no_assert_include.gen.py @@ -13,7 +13,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers for header in public_headers: if header == 'cassert': diff --git a/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp b/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp index a07260a34516f..31511064ce7ca 100644 --- a/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp +++ b/libcxx/test/libcxx/numerics/c.math/constexpr-cxx23-clang.pass.cpp @@ -58,9 +58,15 @@ int main(int, char**) { ASSERT_NOT_CONSTEXPR_CXX23(std::frexp(0.0f, &DummyInt) == 0.0f); ASSERT_NOT_CONSTEXPR_CXX23(std::frexp(0.0, &DummyInt) == 0.0); +//FIXME: currently linux powerpc does not support this expansion +// since 0.0L lowers to ppcf128 and special handling is required. +#if !defined(__LONG_DOUBLE_IBM128__) ASSERT_NOT_CONSTEXPR_CXX23(std::frexp(0.0L, &DummyInt) == 0.0L); +#endif ASSERT_NOT_CONSTEXPR_CXX23(std::frexpf(0.0f, &DummyInt) == 0.0f); +#if !defined(__LONG_DOUBLE_IBM128__) ASSERT_NOT_CONSTEXPR_CXX23(std::frexpl(0.0L, &DummyInt) == 0.0L); +#endif ASSERT_NOT_CONSTEXPR_CXX23(std::ilogb(1.0f) == 0); ASSERT_NOT_CONSTEXPR_CXX23(std::ilogb(1.0) == 0); diff --git a/libcxx/test/libcxx/transitive_includes.gen.py b/libcxx/test/libcxx/transitive_includes.gen.py index c446ceff7fef6..2ac5277878bee 100644 --- a/libcxx/test/libcxx/transitive_includes.gen.py +++ b/libcxx/test/libcxx/transitive_includes.gen.py @@ -20,7 +20,7 @@ import sys sys.path.append(sys.argv[1]) -from libcxx.test.header_information import lit_header_restrictions, public_headers +from libcxx.header_information import lit_header_restrictions, public_headers import re diff --git a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp index 1411796d65963..77c88873073c9 100644 --- a/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp +++ b/libcxx/test/std/algorithms/robust_against_adl.compile.pass.cpp @@ -11,7 +11,7 @@ // https://buildkite.com/llvm-project/libcxx-ci/builds/15823#0184fc0b-d56b-4774-9e1d-35fe24e09e37 // It seems like the CI gcc version is buggy. I can't reproduce the failure on my system or on // godbolt (https://godbolt.org/z/rsPv8e8fn). -// UNSUPPORTED: gcc-12 +// UNSUPPORTED: gcc-12, gcc-13 #include #include diff --git a/libcxx/test/std/ranges/range.utility/range.utility.conv/to.pass.cpp b/libcxx/test/std/ranges/range.utility/range.utility.conv/to.pass.cpp index 75f55bc420d0e..03270f25fd92b 100644 --- a/libcxx/test/std/ranges/range.utility/range.utility.conv/to.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/range.utility.conv/to.pass.cpp @@ -19,6 +19,7 @@ #include #include "container.h" #include "test_iterators.h" +#include "test_macros.h" #include "test_range.h" template @@ -119,6 +120,7 @@ struct Fallback { constexpr void push_back(value_type) {} constexpr value_type* begin() { return &x; } constexpr value_type* end() { return &x; } + std::size_t size() const { return 0; } }; struct CtrDirectOrFallback : Fallback { @@ -180,7 +182,7 @@ struct Reservable : Fallback { reserve_called = true; } }; -static_assert(std::ranges::__reservable_container>); +LIBCPP_STATIC_ASSERT(std::ranges::__reservable_container>); constexpr void test_constraints() { { // Case 1 -- construct directly from the range. diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp index 70ddab63f0c41..84e2c8ab1af0c 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp @@ -7,7 +7,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // TODO FMT __builtin_memcpy isn't constexpr in GCC -// UNSUPPORTED: gcc-12 +// UNSUPPORTED: gcc-12, gcc-13 // diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h index 7a9cdaab7e93e..0a5c6649240d6 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_tests.h +++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h @@ -3145,8 +3145,13 @@ void format_tests(TestFunction check, ExceptionTest check_exception) { // *** Test invalid format strings *** check_exception("The format string terminates at a '{'", SV("{")); + check_exception("The argument index value is too large for the number of arguments supplied", SV("{:")); check_exception("The replacement field misses a terminating '}'", SV("{:"), 42); + check_exception("The argument index should end with a ':' or a '}'", SV("{0")); + check_exception("The argument index value is too large for the number of arguments supplied", SV("{0:")); + check_exception("The replacement field misses a terminating '}'", SV("{0:"), 42); + check_exception("The format string contains an invalid escape sequence", SV("}")); check_exception("The format string contains an invalid escape sequence", SV("{:}-}"), 42); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp index 6943ddc2f968e..e16d50f18284f 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp @@ -50,6 +50,17 @@ auto test_exception = }; int main(int, char**) { +#if !defined(TEST_HAS_NO_EXCEPTIONS) + // reproducer of https://llvm.org/PR65011 + try { + const char fmt[] = {'{', '0'}; + char buf[4096]; + [[maybe_unused]] auto ignored = + std::vformat_to(buf, std::string_view{fmt, fmt + sizeof(fmt)}, std::make_format_args()); + } catch (...) { + } +#endif // !defined(TEST_HAS_NO_EXCEPTIONS) + format_tests(test, test_exception); #ifndef TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp index 804650fde3f3e..b96c9b11e2962 100644 --- a/libcxx/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.rel/is_convertible.pass.cpp @@ -117,9 +117,12 @@ int main(int, char**) // Non-referencable function type static_assert((!std::is_convertible::value), ""); +// TODO(LLVM-19): Re-enable this once we switch to GCC 14. This is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109680 +#ifndef TEST_COMPILER_GCC static_assert((!std::is_convertible::value), ""); static_assert((!std::is_convertible::value), ""); static_assert((!std::is_convertible::value), ""); +#endif static_assert((!std::is_convertible::value), ""); static_assert((!std::is_convertible::value), ""); static_assert((!std::is_convertible::value), ""); diff --git a/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp b/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp index 28495cfebd45c..6e420d63dbd59 100644 --- a/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.rel/is_convertible_fallback.pass.cpp @@ -10,6 +10,8 @@ // ADDITIONAL_COMPILE_FLAGS: -D _LIBCPP_USE_IS_CONVERTIBLE_FALLBACK +// UNSUPPORTED: gcc-13 + // type_traits // is_convertible diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp index 0d0d62432e01d..8f3c0959c622f 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.members/op_or_eq.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=15000000 + // bitset& operator|=(const bitset& rhs); // constexpr since C++23 #include diff --git a/libcxx/test/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp index bf182523880d1..ef4b7fc60329d 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.members/right_shift_eq.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=15000000 + // bitset& operator<<=(size_t pos); // constexpr since C++23 #include diff --git a/libcxx/utils/CMakeLists.txt b/libcxx/utils/CMakeLists.txt index 0338432f344a0..ce4e289290dce 100644 --- a/libcxx/utils/CMakeLists.txt +++ b/libcxx/utils/CMakeLists.txt @@ -2,6 +2,10 @@ add_custom_target(libcxx-generate-feature-test-macros COMMAND "${Python3_EXECUTABLE}" "${LIBCXX_SOURCE_DIR}/utils/generate_feature_test_macro_components.py" COMMENT "Generate the header and tests for feature test macros.") +add_custom_target(libcxx-generate-std-clang-module-header + COMMAND "${Python3_EXECUTABLE}" "${CMAKE_CURRENT_SOURCE_DIR}/generate_std_clang_module_header.py" + COMMENT "Generate the <__std_clang_module> header") + add_custom_target(libcxx-generate-extended-grapheme-cluster-tables COMMAND "${Python3_EXECUTABLE}" @@ -38,6 +42,7 @@ add_custom_target(libcxx-generate-iwyu-mapping add_custom_target(libcxx-generate-files DEPENDS libcxx-generate-feature-test-macros + libcxx-generate-std-clang-module-header libcxx-generate-extended-grapheme-cluster-tables libcxx-generate-extended-grapheme-cluster-tests libcxx-generate-escaped-output-table diff --git a/libcxx/utils/generate_std_clang_module_header.py b/libcxx/utils/generate_std_clang_module_header.py new file mode 100644 index 0000000000000..afdc9f653c2a2 --- /dev/null +++ b/libcxx/utils/generate_std_clang_module_header.py @@ -0,0 +1,64 @@ +# ===----------------------------------------------------------------------===## +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ===----------------------------------------------------------------------===## + +import operator +import os.path + +import libcxx.header_information + +header_restrictions = libcxx.header_information.header_restrictions + +libcxx_include_directory = os.path.join( + os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "include" +) +with open( + os.path.join(libcxx_include_directory, "__std_clang_module"), "w" +) as std_clang_module_header: + std_clang_module_header.write( + """\ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING, this entire header is generated by +// utils/generate_std_clang_module_header.py +// DO NOT MODIFY! + +// This header should not be directly included, it's exclusively to import all +// of the libc++ public clang modules for the `std` clang module to export. In +// other words, it's to facilitate `@import std;` in Objective-C++ and `import std` +// in Swift to expose all of the libc++ interfaces. This is generally not +// recommended, however there are some clients that need to import all of libc++ +// without knowing what "all" is. +#if !__building_module(std) +# error "Do not include this header directly, include individual headers instead" +#endif + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +""" + ) + # Include the angle brackets in sorting so that sorts before + # like check-format wants. + for include, header in sorted([(f"<{header}>", header) for header in libcxx.header_information.public_headers]): + header_restriction = header_restrictions.get(header) + if header_restriction: + std_clang_module_header.write(f"#if {header_restriction}\n") + std_clang_module_header.write(f"# include {include}\n") + std_clang_module_header.write(f"#endif\n") + else: + std_clang_module_header.write(f"#include {include}\n") diff --git a/libcxx/utils/libcxx/test/header_information.py b/libcxx/utils/libcxx/header_information.py similarity index 78% rename from libcxx/utils/libcxx/test/header_information.py rename to libcxx/utils/libcxx/header_information.py index 9ca0e9548c724..169638d5efc11 100644 --- a/libcxx/utils/libcxx/test/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -8,6 +8,46 @@ import os, pathlib +header_restrictions = { + # headers with #error directives + "atomic": "!defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)", + "stdatomic.h": "!defined(_LIBCPP_HAS_NO_ATOMIC_HEADER)", + + # headers with #error directives + "ios": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "locale.h": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + # transitive includers of the above headers + "clocale": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "codecvt": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "experimental/regex": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "fstream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "iomanip": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "iostream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "istream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "locale": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "ostream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "regex": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "sstream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "streambuf": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + "strstream": "!defined(_LIBCPP_HAS_NO_LOCALIZATION)", + + # headers with #error directives + "barrier": "!defined(_LIBCPP_HAS_NO_THREADS)", + "future": "!defined(_LIBCPP_HAS_NO_THREADS)", + "latch": "!defined(_LIBCPP_HAS_NO_THREADS)", + "semaphore": "!defined(_LIBCPP_HAS_NO_THREADS)", + "shared_mutex": "!defined(_LIBCPP_HAS_NO_THREADS)", + "stop_token": "!defined(_LIBCPP_HAS_NO_THREADS)", + "thread": "!defined(_LIBCPP_HAS_NO_THREADS)", + + # headers with #error directives + "wchar.h": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", + "wctype.h": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", + # transitive includers of the above headers + "cwchar": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", + "cwctype": "!defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS)", +} + lit_header_restrictions = { "barrier": "// UNSUPPORTED: no-threads, c++03, c++11, c++14, c++17", "clocale": "// UNSUPPORTED: no-localization", @@ -136,7 +176,7 @@ def is_header(file): and file.name != "libcxx.imp" ) -libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) +libcxx_root = pathlib.Path(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) include = pathlib.Path(os.path.join(libcxx_root, "include")) test = pathlib.Path(os.path.join(libcxx_root, "test")) assert libcxx_root.exists() diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py index f43c634a1644f..f1401d7afc635 100644 --- a/libcxx/utils/libcxx/test/params.py +++ b/libcxx/utils/libcxx/test/params.py @@ -55,6 +55,14 @@ # Don't fail compilation in case the compiler fails to perform the requested # loop vectorization. "-Wno-pass-failed", + + # TODO: Find out why GCC warns in lots of places (is this a problem with always_inline?) + "-Wno-dangling-reference", + "-Wno-mismatched-new-delete", + "-Wno-redundant-move", + + # This doesn't make sense in real code, but we have to test it because the standard requires us to not break + "-Wno-self-move", ] _allStandards = ["c++03", "c++11", "c++14", "c++17", "c++20", "c++23", "c++26"] diff --git a/libcxxabi/test/catch_member_function_pointer_02.pass.cpp b/libcxxabi/test/catch_member_function_pointer_02.pass.cpp index 3236f9aae1de1..667447db1e68a 100644 --- a/libcxxabi/test/catch_member_function_pointer_02.pass.cpp +++ b/libcxxabi/test/catch_member_function_pointer_02.pass.cpp @@ -15,7 +15,7 @@ // GCC supports noexcept function types but this test still fails. // This is likely a bug in their implementation. Investigation needed. -// XFAIL: gcc-11, gcc-12 +// XFAIL: gcc-11, gcc-12, gcc-13 #include diff --git a/lld/ELF/Arch/PPC.cpp b/lld/ELF/Arch/PPC.cpp index 87942c1e92452..3d21edb3453a1 100644 --- a/lld/ELF/Arch/PPC.cpp +++ b/lld/ELF/Arch/PPC.cpp @@ -471,10 +471,14 @@ void PPC::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, if (insn >> 26 != 31) error("unrecognized instruction for IE to LE R_PPC_TLS"); // addi rT, rT, x@tls --> addi rT, rT, x@tprel@l - uint32_t dFormOp = getPPCDFormOp((read32(loc) & 0x000007fe) >> 1); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC_TLS"); - write32(loc, (dFormOp << 26) | (insn & 0x03ff0000) | lo(val)); + unsigned secondaryOp = (read32(loc) & 0x000007fe) >> 1; + uint32_t dFormOp = getPPCDFormOp(secondaryOp); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC_TLS"); + } + write32(loc, (dFormOp | (insn & 0x03ff0000) | lo(val))); break; } default: diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 36b1d0e3c9be4..0b6459f852c0b 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -37,6 +37,12 @@ enum XFormOpcd { STHX = 407, STWX = 151, STDX = 149, + LHAX = 343, + LWAX = 341, + LFSX = 535, + LFDX = 599, + STFSX = 663, + STFDX = 727, ADD = 266, }; @@ -49,7 +55,6 @@ enum DFormOpcd { LWZ = 32, LWZU = 33, LFSU = 49, - LD = 58, LFDU = 51, STB = 38, STBU = 39, @@ -59,10 +64,20 @@ enum DFormOpcd { STWU = 37, STFSU = 53, STFDU = 55, - STD = 62, + LHA = 42, + LFS = 48, + LFD = 50, + STFS = 52, + STFD = 54, ADDI = 14 }; +enum DSFormOpcd { + LD = 58, + LWA = 58, + STD = 62 +}; + constexpr uint32_t NOP = 0x60000000; enum class PPCLegacyInsn : uint32_t { @@ -825,26 +840,48 @@ void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, } } +// Map X-Form instructions to their DS-Form counterparts, if applicable. +// The full encoding is returned here to distinguish between the different +// DS-Form instructions. +unsigned elf::getPPCDSFormOp(unsigned secondaryOp) { + switch (secondaryOp) { + case LWAX: + return (LWA << 26) | 0x2; + case LDX: + return LD << 26; + case STDX: + return STD << 26; + default: + return 0; + } +} + unsigned elf::getPPCDFormOp(unsigned secondaryOp) { switch (secondaryOp) { case LBZX: - return LBZ; + return LBZ << 26; case LHZX: - return LHZ; + return LHZ << 26; case LWZX: - return LWZ; - case LDX: - return LD; + return LWZ << 26; case STBX: - return STB; + return STB << 26; case STHX: - return STH; + return STH << 26; case STWX: - return STW; - case STDX: - return STD; + return STW << 26; + case LHAX: + return LHA << 26; + case LFSX: + return LFS << 26; + case LFDX: + return LFD << 26; + case STFSX: + return STFS << 26; + case STFDX: + return STFD << 26; case ADD: - return ADDI; + return ADDI << 26; default: return 0; } @@ -898,10 +935,16 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, error("unrecognized instruction for IE to LE R_PPC64_TLS"); uint32_t secondaryOp = (read32(loc) & 0x000007FE) >> 1; // bits 21-30 uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - error("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc, ((dFormOp << 26) | (read32(loc) & 0x03FFFFFF))); - relocateNoSym(loc + offset, R_PPC64_TPREL16_LO, val); + uint32_t finalReloc; + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + error("unrecognized instruction for IE to LE R_PPC64_TLS"); + finalReloc = R_PPC64_TPREL16_LO_DS; + } else + finalReloc = R_PPC64_TPREL16_LO; + write32(loc, dFormOp | (read32(loc) & 0x03ff0000)); + relocateNoSym(loc + offset, finalReloc, val); } else if (locAsInt % 4 == 1) { // If the offset is not 4 byte aligned then we have a PCRel type reloc. // This version of the relocation is offset by one byte from the @@ -926,9 +969,12 @@ void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, } } else { uint32_t dFormOp = getPPCDFormOp(secondaryOp); - if (dFormOp == 0) - errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); - write32(loc - 1, ((dFormOp << 26) | (tlsInstr & 0x03FF0000))); + if (dFormOp == 0) { // Expecting a DS-Form instruction. + dFormOp = getPPCDSFormOp(secondaryOp); + if (dFormOp == 0) + errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS"); + } + write32(loc - 1, (dFormOp | (tlsInstr & 0x03ff0000))); } } else { errorOrWarn("R_PPC64_TLS must be either 4 byte aligned or one byte " diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h index 9d4f22dd93f1b..47dbe6b4d1c65 100644 --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -207,6 +207,7 @@ void processArmCmseSymbols(); void writePPC32GlinkSection(uint8_t *buf, size_t numEntries); unsigned getPPCDFormOp(unsigned secondaryOp); +unsigned getPPCDSFormOp(unsigned secondaryOp); // In the PowerPC64 Elf V2 abi a function can have 2 entry points. The first // is a global entry point (GEP) which typically is used to initialize the TOC diff --git a/lld/test/ELF/ppc32-tls-ie.s b/lld/test/ELF/ppc32-tls-ie.s index f9f46452484a6..84a105c8626b3 100644 --- a/lld/test/ELF/ppc32-tls-ie.s +++ b/lld/test/ELF/ppc32-tls-ie.s @@ -12,8 +12,8 @@ # IE-REL: FLAGS STATIC_TLS ## A non-preemptable symbol (b) has 0 st_shndx. # IE-REL: .rela.dyn { -# IE-REL-NEXT: 0x20238 R_PPC_TPREL32 - 0xC -# IE-REL-NEXT: 0x20234 R_PPC_TPREL32 a 0x0 +# IE-REL-NEXT: 0x20258 R_PPC_TPREL32 - 0xC +# IE-REL-NEXT: 0x20254 R_PPC_TPREL32 a 0x0 # IE-REL-NEXT: } ## &.got[3] - _GLOBAL_OFFSET_TABLE_ = 12 @@ -44,6 +44,12 @@ lbzx 10, 8, c@tls # IE-NEXT: stbx 14, 4, 2 # IE-NEXT: sthx 15, 5, 2 # IE-NEXT: stwx 16, 6, 2 +# IE-NEXT: lhax 17, 7, 2 +# IE-NEXT: lwax 18, 8, 2 +# IE-NEXT: lfsx 19, 9, 2 +# IE-NEXT: lfdx 20, 10, 2 +# IE-NEXT: stfsx 21, 11, 2 +# IE-NEXT: stfdx 22, 12, 2 ## In LE, these X-Form instructions are changed to their corresponding D-Form. # LE-NEXT: lhz 12, -28660(2) @@ -51,12 +57,26 @@ lbzx 10, 8, c@tls # LE-NEXT: stb 14, -28660(4) # LE-NEXT: sth 15, -28660(5) # LE-NEXT: stw 16, -28660(6) +# LE-NEXT: lha 17, -28660(7) +# LE-NEXT: lwa 18, -28660(8) +# LE-NEXT: lfs 19, -28660(9) +# LE-NEXT: lfd 20, -28660(10) +# LE-NEXT: stfs 21, -28660(11) +# LE-NEXT: stfd 22, -28660(12) lhzx 12, 2, s@tls lwzx 13, 3, i@tls stbx 14, 4, c@tls sthx 15, 5, s@tls stwx 16, 6, i@tls +lhax 17, 7, s@tls +lwax 18, 8, i@tls +lfsx 19, 9, f@tls +lfdx 20, 10, d@tls +stfsx 21, 11, f@tls +stfdx 22, 12, d@tls +ldx 23, 13, l@tls +stdx 24, 14, l@tls .section .tbss .globl a @@ -66,3 +86,6 @@ a: c: s: i: +f: +d: +l: diff --git a/lld/test/ELF/ppc64-tls-ie.s b/lld/test/ELF/ppc64-tls-ie.s index 8da808b86c30b..8855e8c012399 100644 --- a/lld/test/ELF/ppc64-tls-ie.s +++ b/lld/test/ELF/ppc64-tls-ie.s @@ -24,10 +24,12 @@ # IE-REL: FLAGS STATIC_TLS # IE-REL: .rela.dyn { -# IE-REL-NEXT: 0x204C8 R_PPC64_TPREL64 c 0x0 -# IE-REL-NEXT: 0x204D0 R_PPC64_TPREL64 s 0x0 -# IE-REL-NEXT: 0x204D8 R_PPC64_TPREL64 i 0x0 -# IE-REL-NEXT: 0x204E0 R_PPC64_TPREL64 l 0x0 +# IE-REL-NEXT: 0x205A8 R_PPC64_TPREL64 c 0x0 +# IE-REL-NEXT: 0x205B0 R_PPC64_TPREL64 s 0x0 +# IE-REL-NEXT: 0x205B8 R_PPC64_TPREL64 i 0x0 +# IE-REL-NEXT: 0x205C0 R_PPC64_TPREL64 l 0x0 +# IE-REL-NEXT: 0x205C8 R_PPC64_TPREL64 f 0x0 +# IE-REL-NEXT: 0x205D0 R_PPC64_TPREL64 d 0x0 # IE-REL-NEXT: } # INPUT-REL: R_PPC64_GOT_TPREL16_HA c 0x0 @@ -152,10 +154,64 @@ test_ds: ld 4, l@got@tprel(2) stdx 3, 4, l@tls +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 3, 13, 0 +# LE-NEXT: lha 3, -28670(3) +test_lhax: + addis 3, 2, s@got@tprel@ha + ld 3, s@got@tprel@l(3) + lhax 3, 3, s@tls + +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 3, 13, 0 +# LE-NEXT: lwa 3, -28668(3) +test_lwax: + addis 3, 2, i@got@tprel@ha + ld 3, i@got@tprel@l(3) + lwax 3, 3, i@tls + +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 3, 13, 0 +# LE-NEXT: lfs 3, -28656(3) +test_lfsx: + addis 3, 2, f@got@tprel@ha + ld 3, f@got@tprel@l(3) + lfsx 3, 3, f@tls + +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 3, 13, 0 +# LE-NEXT: lfd 3, -28648(3) +test_lfdx: + addis 3, 2, d@got@tprel@ha + ld 3, d@got@tprel@l(3) + lfdx 3, 3, d@tls + +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 4, 13, 0 +# LE-NEXT: stfs 3, -28656(4) +test_stfsx: + addis 4, 2, f@got@tprel@ha + ld 4, f@got@tprel@l(4) + stfsx 3, 4, f@tls + +# LE-LABEL: : +# LE-NEXT: nop +# LE-NEXT: addis 4, 13, 0 +# LE-NEXT: stfd 3, -28648(4) +test_stfdx: + addis 4, 2, d@got@tprel@ha + ld 4, d@got@tprel@l(4) + stfdx 3, 4, d@tls + # NOREL: There are no relocations in this file. .section .tdata,"awT",@progbits -.globl c, s, i, l +.globl c, s, i, l, f, d c: .byte 97 @@ -170,3 +226,9 @@ i: .p2align 3 l: .quad 55 +f: +.long 55 + +.p2align 3 +d: +.quad 55 diff --git a/lld/test/ELF/ppc64-tls-pcrel-ie.s b/lld/test/ELF/ppc64-tls-pcrel-ie.s index f7a828dc41744..38c081f966469 100644 --- a/lld/test/ELF/ppc64-tls-pcrel-ie.s +++ b/lld/test/ELF/ppc64-tls-pcrel-ie.s @@ -29,6 +29,12 @@ SECTIONS { .text_val 0x1002000 : { *(.text_val) } .text_twoval 0x1003000 : { *(.text_twoval) } .text_incrval 0x1004000 : { *(.text_incrval) } + .text_incrval_half 0x1005000 : { *(.text_incrval_half) } + .text_incrval_word 0x1006000 : { *(.text_incrval_word) } + .text_incrval_float 0x1007000 : { *(.text_incrval_float) } + .text_incrval_double 0x1008000 : { *(.text_incrval_double) } + .text_incrval_dword 0x1009000 : { *(.text_incrval_dword) } + .text_incrval_half_zero 0x1010000 : { *(.text_incrval_half_zero) } } #--- defs @@ -42,26 +48,26 @@ y: #--- asm # IE-RELOC: Relocation section '.rela.dyn' at offset 0x10090 contains 2 entries: -# IE-RELOC: 00000000010040f0 0000000100000049 R_PPC64_TPREL64 0000000000000000 x + 0 -# IE-RELOC: 00000000010040f8 0000000200000049 R_PPC64_TPREL64 0000000000000000 y + 0 +# IE-RELOC: 00000000010100f0 0000000100000049 R_PPC64_TPREL64 0000000000000000 x + 0 +# IE-RELOC-NEXT: 00000000010100f8 0000000200000049 R_PPC64_TPREL64 0000000000000000 y + 0 # IE-SYM: Symbol table '.dynsym' contains 3 entries: # IE-SYM: 1: 0000000000000000 0 TLS GLOBAL DEFAULT UND x # IE-SYM: 2: 0000000000000000 0 TLS GLOBAL DEFAULT UND y # IE-GOT: Hex dump of section '.got': -# IE-GOT-NEXT: 0x010040e8 e8c00001 00000000 00000000 00000000 +# IE-GOT-NEXT: 0x010100e8 e8800101 00000000 00000000 00000000 # LE-RELOC: There are no relocations in this file. -# LE-SYM: Symbol table '.symtab' contains 8 entries: -# LE-SYM: 6: 0000000000000000 0 TLS GLOBAL DEFAULT 6 x -# LE-SYM: 7: 0000000000000004 0 TLS GLOBAL DEFAULT 6 y +# LE-SYM: Symbol table '.symtab' contains 14 entries: +# LE-SYM: 0000000000000000 0 TLS GLOBAL DEFAULT [[#]] x +# LE-SYM: 0000000000000004 0 TLS GLOBAL DEFAULT [[#]] y # LE-GOT: could not find section '.got' # IE-LABEL: : -# IE-NEXT: pld 3, 12528(0), 1 +# IE-NEXT: pld 3, 61680(0), 1 # IE-NEXT: add 3, 3, 13 # IE-NEXT: blr # LE-LABEL: : @@ -75,7 +81,7 @@ IEAddr: blr # IE-LABEL: : -# IE-NEXT: pld 3, 12512(0), 1 +# IE-NEXT: pld 3, 61664(0), 1 # IE-NEXT: add 4, 3, 13 # IE-NEXT: blr # LE-LABEL: : @@ -89,7 +95,7 @@ IEAddrCopy: blr # IE-LABEL: : -# IE-NEXT: pld 3, 8432(0), 1 +# IE-NEXT: pld 3, 57584(0), 1 # IE-NEXT: lwzx 3, 3, 13 # IE-NEXT: blr # LE-LABEL: : @@ -103,8 +109,8 @@ IEVal: blr # IE-LABEL: : -# IE-NEXT: pld 3, 4336(0), 1 -# IE-NEXT: pld 4, 4336(0), 1 +# IE-NEXT: pld 3, 53488(0), 1 +# IE-NEXT: pld 4, 53488(0), 1 # IE-NEXT: lwzx 3, 3, 13 # IE-NEXT: lwzx 4, 4, 13 # IE-NEXT: blr @@ -123,7 +129,7 @@ IETwoVal: blr # IE-LABEL: : -# IE-NEXT: pld 4, 248(0), 1 +# IE-NEXT: pld 4, 49400(0), 1 # IE-NEXT: lwzx 3, 4, 13 # IE-NEXT: stwx 3, 4, 13 # IE-NEXT: blr @@ -138,3 +144,105 @@ IEIncrementVal: lwzx 3, 4, y@tls@pcrel stwx 3, 4, y@tls@pcrel blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 45304(0), 1 +# IE-NEXT: lhax 3, 4, 13 +# IE-NEXT: sthx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: lha 3, 0(4) +# LE-NEXT: sth 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_half, "ax", %progbits +IEIncrementValHalf: + pld 4, y@got@tprel@pcrel(0), 1 + lhax 3, 4, y@tls@pcrel + sthx 3, 4, y@tls@pcrel + blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 41208(0), 1 +# IE-NEXT: lwax 3, 4, 13 +# IE-NEXT: stwx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: lwa 3, 0(4) +# LE-NEXT: stw 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_word, "ax", %progbits +IEIncrementValWord: + pld 4, y@got@tprel@pcrel(0), 1 + lwax 3, 4, y@tls@pcrel + stwx 3, 4, y@tls@pcrel + blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 37112(0), 1 +# IE-NEXT: lfsx 3, 4, 13 +# IE-NEXT: stfsx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: lfs 3, 0(4) +# LE-NEXT: stfs 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_float, "ax", %progbits +IEIncrementValFloat: + pld 4, y@got@tprel@pcrel(0), 1 + lfsx 3, 4, y@tls@pcrel + stfsx 3, 4, y@tls@pcrel + blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 33016(0), 1 +# IE-NEXT: lfdx 3, 4, 13 +# IE-NEXT: stfdx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: lfd 3, 0(4) +# LE-NEXT: stfd 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_double, "ax", %progbits +IEIncrementValDouble: + pld 4, y@got@tprel@pcrel(0), 1 + lfdx 3, 4, y@tls@pcrel + stfdx 3, 4, y@tls@pcrel + blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 28920(0), 1 +# IE-NEXT: ldx 3, 4, 13 +# IE-NEXT: stdx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: ld 3, 0(4) +# LE-NEXT: std 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_dword, "ax", %progbits +IEIncrementValDword: + pld 4, y@got@tprel@pcrel(0), 1 + ldx 3, 4, y@tls@pcrel + stdx 3, 4, y@tls@pcrel + blr + +# IE-LABEL: : +# IE-NEXT: pld 4, 248(0), 1 +# IE-NEXT: lhzx 3, 4, 13 +# IE-NEXT: sthx 3, 4, 13 +# IE-NEXT: blr +# LE-LABEL: : +# LE-NEXT: paddi 4, 13, -28668, 0 +# LE-NEXT: lhz 3, 0(4) +# LE-NEXT: sth 3, 0(4) +# LE-NEXT: blr +.section .text_incrval_half_zero, "ax", %progbits +IEIncrementValHalfZero: + pld 4, y@got@tprel@pcrel(0), 1 + lhzx 3, 4, y@tls@pcrel + sthx 3, 4, y@tls@pcrel + blr diff --git a/lldb/source/Host/windows/FileSystem.cpp b/lldb/source/Host/windows/FileSystem.cpp index b919d9bcd9dd4..4b0cd74b8013b 100644 --- a/lldb/source/Host/windows/FileSystem.cpp +++ b/lldb/source/Host/windows/FileSystem.cpp @@ -101,6 +101,8 @@ int FileSystem::Open(const char *path, int flags, int mode) { std::wstring wpath; if (!llvm::ConvertUTF8toWide(path, wpath)) return -1; + // All other bits are rejected by _wsopen_s + mode = mode & (_S_IREAD | _S_IWRITE); int result; ::_wsopen_s(&result, wpath.c_str(), flags, _SH_DENYNO, mode); return result; diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index bd12d8c3964f6..0cb7a6266f1ab 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -230,19 +230,57 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ +* Improved code sequence of materializing 64-bit immediate numbers, expanding + ``is.fpclass`` intrinsic and forwarding stores. +* Implemented DFP instructions (for use via inline asm). +* Improved code gen for vector add. +* Added ability to show statistics of number of entries in the TOC. +* Added Binary Coded Decimal Assist instructions (for use via inline asm). +* Added basic support for vector functions in GlobalISel. +* Added additional X-Form load and store instruction generation for TLS accesses. +* PPC64LE backend is added to JITLink. +* Added various bug fixes and optimizations. +* Added function pointer alignment to the DataLayout for Power, which lets us + make more informed choices about what this alignment defaults to for various + purposes (e.g., C++ pointers to member). If the target ABI uses function + descriptor objects, this is the alignment we would emit the descriptor with. + Otherwise, a function pointer points to a global entry point, so this is at + least the alignment for code on Power (i.e., 4-bytes). + +AIX Support/improvements: + + * A new option ``-mxcoff-roptr`` is added to ``clang`` and ``llc``. When this option is present, constant objects with relocatable address values are put - into the RO data section. This option should be used with the ``-fdata-sections`` - option, and is not supported with ``-fno-data-sections``. The option is - only supported on AIX. -* On AIX, teach the profile runtime to check for a build-id string; such string - can be created by the -mxcoff-build-id option. + into the RO data section. This option should be used with the + ``-fdata-sections`` option, and is not supported with ``-fno-data-sections``. + +* Taught the profile runtime to check for a build-id string. Build-id strings + can be created via the ``-mxcoff-build-id`` option. + * Removed ``-ppc-quadword-atomics`` which only affected lock-free quadword atomics on AIX. Now backend generates lock-free quadword atomics code on AIX by default. To support lock-free quadword atomics in libatomic, the OS level must be at least AIX 7.2 TL5 SP3 with libc++.rte of version 17.1.1 or above installed. +* Integrated assembler is enabled by default on AIX. +* System assembler is always used to compile assembly files on AIX. +* Added support for local-exec TLS. +* Added a new option, ``--traceback-table``, to ``llvm-objdump`` to print out + the traceback table information for XCOFF object files. +* Added ``llvm-ar`` object mode options ``-X32``, ``-X64``, ``-X32-64``, + and ``-Xany``. +* Changed the default name of the text-section csect to be an empty string + instead of ``.text``. This change does not affect the behaviour + of the program. +* Fixed a problem when the personality routine for the legacy AIX ``xlclang++`` + compiler uses the stack slot to pass the exception object to the landing pad. + Runtime routine ``__xlc_exception_handle()`` invoked by the landing pad to + retrieve the exception object now skips frames not associated with functions + that are C++ EH-aware because the compiler sometimes generates a wrapper of + ``__xlc_exception_handle()`` for optimization purposes. + Changes to the RISC-V Backend ----------------------------- diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h index b109b7f7e65ae..7b2bfdac75a8f 100644 --- a/llvm/include/llvm/Analysis/LazyValueInfo.h +++ b/llvm/include/llvm/Analysis/LazyValueInfo.h @@ -115,6 +115,9 @@ class LazyValueInfo { /// PredBB to OldSucc to be from PredBB to NewSucc instead. void threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, BasicBlock *NewSucc); + /// Remove information related to this value from the cache. + void forgetValue(Value *V); + /// Inform the analysis cache that we have erased a block. void eraseBlock(BasicBlock *BB); diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h index 310c8900af9ef..c57bd2350af14 100644 --- a/llvm/include/llvm/Option/ArgList.h +++ b/llvm/include/llvm/Option/ArgList.h @@ -299,6 +299,7 @@ class ArgList { /// \p Default if neither option is given. If both the option and its /// negation are present, the last one wins. bool hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; + bool hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, bool Default) const; /// hasFlag - Given an option \p Pos, an alias \p PosAlias and its negative /// form \p Neg, return true if the option or its alias is present, false if diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index 33651783cb177..2ba6036056d99 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -465,6 +465,10 @@ class LazyValueInfoImpl { F.print(OS, &Writer); } + /// This is part of the update interface to remove information related to this + /// value from the cache. + void forgetValue(Value *V) { TheCache.eraseValue(V); } + /// This is part of the update interface to inform the cache /// that a block has been deleted. void eraseBlock(BasicBlock *BB) { @@ -1969,6 +1973,11 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, } } +void LazyValueInfo::forgetValue(Value *V) { + if (PImpl) + getImpl(PImpl, AC, nullptr).forgetValue(V); +} + void LazyValueInfo::eraseBlock(BasicBlock *BB) { if (PImpl) { getImpl(PImpl, AC, BB->getModule()).eraseBlock(BB); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index 4e80e9b58c060..523e077fd9a28 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -538,6 +538,10 @@ void MachineLICMBase::HoistRegionPostRA() { PhysRegDefs.set(*AI); } + // Funclet entry blocks will clobber all registers + if (const uint32_t *Mask = BB->getBeginClobberMask(TRI)) + PhysRegClobbers.setBitsNotInMask(Mask); + SpeculationState = SpeculateUnknown; for (MachineInstr &MI : *BB) ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); diff --git a/llvm/lib/Option/ArgList.cpp b/llvm/lib/Option/ArgList.cpp index 400bedabc0037..86f28e578e5d9 100644 --- a/llvm/lib/Option/ArgList.cpp +++ b/llvm/lib/Option/ArgList.cpp @@ -75,6 +75,13 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { return Default; } +bool ArgList::hasFlagNoClaim(OptSpecifier Pos, OptSpecifier Neg, + bool Default) const { + if (Arg *A = getLastArgNoClaim(Pos, Neg)) + return A->getOption().matches(Pos); + return Default; +} + bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, bool Default) const { if (Arg *A = getLastArg(Pos, PosAlias, Neg)) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0605dfa637939..c7a6dd7deb45b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13840,7 +13840,17 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4: case Intrinsic::aarch64_neon_ld1x2: case Intrinsic::aarch64_neon_ld1x3: - case Intrinsic::aarch64_neon_ld1x4: + case Intrinsic::aarch64_neon_ld1x4: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.arg_size() - 1); + Info.offset = 0; + Info.align.reset(); + // volatile loads with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOLoad; + return true; + } case Intrinsic::aarch64_neon_ld2lane: case Intrinsic::aarch64_neon_ld3lane: case Intrinsic::aarch64_neon_ld4lane: @@ -13848,9 +13858,13 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld3r: case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; - // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + // ldx return struct with the same vec type + Type *RetTy = I.getType(); + auto *StructTy = cast(RetTy); + unsigned NumElts = StructTy->getNumElements(); + Type *VecTy = StructTy->getElementType(0); + MVT EleVT = MVT::getVT(VecTy).getVectorElementType(); + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; Info.align.reset(); @@ -13863,20 +13877,40 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_st4: case Intrinsic::aarch64_neon_st1x2: case Intrinsic::aarch64_neon_st1x3: - case Intrinsic::aarch64_neon_st1x4: + case Intrinsic::aarch64_neon_st1x4: { + Info.opc = ISD::INTRINSIC_VOID; + unsigned NumElts = 0; + for (const Value *Arg : I.args()) { + Type *ArgTy = Arg->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(I.arg_size() - 1); + Info.offset = 0; + Info.align.reset(); + // volatile stores with NEON intrinsics not supported + Info.flags = MachineMemOperand::MOStore; + return true; + } case Intrinsic::aarch64_neon_st2lane: case Intrinsic::aarch64_neon_st3lane: case Intrinsic::aarch64_neon_st4lane: { Info.opc = ISD::INTRINSIC_VOID; - // Conservatively set memVT to the entire set of vectors stored. unsigned NumElts = 0; + // all the vector type is same + Type *VecTy = I.getArgOperand(0)->getType(); + MVT EleVT = MVT::getVT(VecTy).getVectorElementType(); + for (const Value *Arg : I.args()) { Type *ArgTy = Arg->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeSizeInBits(ArgTy) / 64; + NumElts += 1; } - Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), EleVT, NumElts); Info.ptrVal = I.getArgOperand(I.arg_size() - 1); Info.offset = 0; Info.align.reset(); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index bd03ffaafab10..30bd580ad86a7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5433,8 +5433,8 @@ static bool getFNEGPatterns(MachineInstr &Root, auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool { MachineOperand &MO = Root.getOperand(1); MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg()); - if (MI != nullptr && MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) && - (MI->getOpcode() == Opcode) && + if (MI != nullptr && (MI->getOpcode() == Opcode) && + MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) && Root.getFlag(MachineInstr::MIFlag::FmContract) && Root.getFlag(MachineInstr::MIFlag::FmNsz) && MI->getFlag(MachineInstr::MIFlag::FmContract) && diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 0f3d346176780..9e72d37880c58 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2253,7 +2253,7 @@ def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; // Large STG to be expanded into a loop. $sz is the size, $Rn is start address. // $Rn_wback is one past the end of the range. $Rm is the loop counter. -let isCodeGenOnly=1, mayStore=1 in { +let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in { def STGloop_wback : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn), [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >, diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp index 59f1e8319ae72..d10bba26023ff 100644 --- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp @@ -572,6 +572,15 @@ bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB, if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) && !(BNEOp0 == CmpValReg && BNEOp1 == DestReg)) return false; + + // Make sure the branch is the only user of the AND. + if (MaskReg.isValid()) { + if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill()) + return false; + if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill()) + return false; + } + ToErase.push_back(&*MBBI); LoopHeadBNETarget = MBBI->getOperand(2).getMBB(); MBBI = skipDebugInstructionsForward(std::next(MBBI), E); diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index cb2a49db92332..f312cc8129ddf 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -772,7 +772,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF, if (FirstSPAdjustAmount) StackSize = FirstSPAdjustAmount; - if (RVFI->isPushable(MF) && MBBI->getOpcode() == RISCV::CM_POP) { + if (RVFI->isPushable(MF) && MBBI != MBB.end() && + MBBI->getOpcode() == RISCV::CM_POP) { // Use available stack adjustment in pop instruction to deallocate stack // space. unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8); diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index cafce628cf6a2..aa20409da4e2b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3414,6 +3414,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { // Because N and True must have the same merge operand (or True's operand is // implicit_def), the "effective" body is the minimum of their VLs. + SDValue OrigVL = VL; VL = GetMinVL(TrueVL, VL); if (!VL) return false; @@ -3461,7 +3462,17 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) { "Expected instructions with mask have a tied dest."); #endif - uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0; + // Use a tumu policy, relaxing it to tail agnostic provided that the merge + // operand is undefined. + // + // However, if the VL became smaller than what the vmerge had originally, then + // elements past VL that were previously in the vmerge's body will have moved + // to the tail. In that case we always need to use tail undisturbed to + // preserve them. + bool MergeVLShrunk = VL != OrigVL; + uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk) + ? RISCVII::TAIL_AGNOSTIC + : /*TUMU*/ 0; SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4398f996c9306..b402db9c4c170 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1648,7 +1648,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FP_ROUND, VT, Custom); setOperationAction(ISD::STRICT_FP_ROUND, VT, Custom); } - for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32 }) { + for (MVT VT : { MVT::f32, MVT::v2f32, MVT::v4f32, MVT::v8f32 }) { setOperationAction(ISD::FP_EXTEND, VT, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, VT, Custom); } @@ -1656,9 +1656,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationPromotedToType(Opc, MVT::v8f16, MVT::v8f32); setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); } - - setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); } // This block controls legalization of the mask vector sizes that are @@ -1975,8 +1972,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setF16Action(MVT::v32f16, Expand); setOperationAction(ISD::FP_ROUND, MVT::v16f16, Custom); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Custom); - setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); - setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Custom); for (unsigned Opc : {ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV}) { setOperationPromotedToType(Opc, MVT::v16f16, MVT::v16f32); setOperationPromotedToType(Opc, MVT::v32f16, MVT::v32f32); @@ -2197,9 +2194,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v32i16, Legal); setOperationAction(ISD::FP_ROUND, MVT::v16f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v16f16, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v16f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v16f32, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f64, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32f16, Custom); @@ -2249,9 +2246,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i16, Custom); setOperationAction(ISD::FP_ROUND, MVT::v8f16, Legal); setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f16, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f32, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::v4f64, Custom); setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); // INSERT_VECTOR_ELT v8f16 extended to VECTOR_SHUFFLE @@ -2275,8 +2272,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (!Subtarget.useSoftFloat() && (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) { - addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass); - addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass); + addRegisterClass(MVT::v8bf16, Subtarget.hasAVX512() ? &X86::VR128XRegClass + : &X86::VR128RegClass); + addRegisterClass(MVT::v16bf16, Subtarget.hasAVX512() ? &X86::VR256XRegClass + : &X86::VR256RegClass); // We set the type action of bf16 to TypeSoftPromoteHalf, but we don't // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT. // Set the operation action Custom to do the customization later. @@ -2291,6 +2290,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); } + setOperationAction(ISD::FP_ROUND, MVT::v8bf16, Custom); addLegalFPImmediate(APFloat::getZero(APFloat::BFloat())); } @@ -2302,6 +2302,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FMUL, MVT::v32bf16, Expand); setOperationAction(ISD::FDIV, MVT::v32bf16, Expand); setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom); + setOperationAction(ISD::FP_ROUND, MVT::v16bf16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom); } @@ -11363,7 +11364,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); - if (VT.getVectorElementType() == MVT::bf16 && Subtarget.hasBF16()) + if (VT.getVectorElementType() == MVT::bf16 && + (Subtarget.hasAVXNECONVERT() || Subtarget.hasBF16())) return LowerBUILD_VECTORvXbf16(Op, DAG, Subtarget); if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) @@ -14795,13 +14797,9 @@ static bool isShuffleFoldableLoad(SDValue V) { } template -static bool isSoftFP16(T VT, const X86Subtarget &Subtarget) { - return VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16(); -} - -template -bool X86TargetLowering::isSoftFP16(T VT) const { - return ::isSoftFP16(VT, Subtarget); +static bool isSoftF16(T VT, const X86Subtarget &Subtarget) { + T EltVT = VT.getScalarType(); + return EltVT == MVT::bf16 || (EltVT == MVT::f16 && !Subtarget.hasFP16()); } /// Try to lower insertion of a single element into a zero vector. @@ -14817,7 +14815,7 @@ static SDValue lowerShuffleAsElementInsertion( unsigned NumElts = VT.getVectorNumElements(); unsigned EltBits = VT.getScalarSizeInBits(); - if (isSoftFP16(EltVT, Subtarget)) + if (isSoftF16(EltVT, Subtarget)) return SDValue(); int V2Index = @@ -20374,7 +20372,7 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); MVT VT = Op.getSimpleValueType(); - if (isSoftFP16(VT)) { + if (isSoftF16(VT, Subtarget)) { MVT NVT = VT.changeVectorElementTypeToInteger(); return DAG.getBitcast(VT, DAG.getNode(ISD::VSELECT, dl, NVT, Cond, DAG.getBitcast(NVT, LHS), @@ -21852,7 +21850,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); - if (isSoftFP16(VT)) + if (isSoftF16(VT, Subtarget)) return promoteXINT_TO_FP(Op, DAG); else if (isLegalConversion(SrcVT, true, Subtarget)) return Op; @@ -22357,7 +22355,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, if (DstVT == MVT::f128) return SDValue(); - if (isSoftFP16(DstVT)) + if (isSoftF16(DstVT, Subtarget)) return promoteXINT_TO_FP(Op, DAG); else if (isLegalConversion(SrcVT, false, Subtarget)) return Op; @@ -23314,7 +23312,7 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); SDValue Res; - if (isSoftFP16(SrcVT)) { + if (isSoftF16(SrcVT, Subtarget)) { MVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; if (IsStrict) return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other}, @@ -23743,7 +23741,7 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { // This code is only for floats and doubles. Fall back to generic code for // anything else. - if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftFP16(SrcVT)) + if (!isScalarFPTypeInSSEReg(SrcVT) || isSoftF16(SrcVT, Subtarget)) return SDValue(); EVT SatVT = cast(Node->getOperand(1))->getVT(); @@ -23888,6 +23886,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { !Subtarget.getTargetTriple().isOSDarwin())) return SDValue(); + if ((SVT == MVT::v8f16 && Subtarget.hasF16C()) || + (SVT == MVT::v16f16 && Subtarget.useAVX512Regs())) + return Op; + if (SVT == MVT::f16) { if (Subtarget.hasFP16()) return Op; @@ -23960,7 +23962,25 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { if (!SVT.isVector()) return Op; + if (SVT.getVectorElementType() == MVT::bf16) { + // FIXME: Do we need to support strict FP? + assert(!IsStrict && "Strict FP doesn't support BF16"); + if (VT.getVectorElementType() == MVT::f64) { + MVT TmpVT = VT.changeVectorElementType(MVT::f32); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, + DAG.getNode(ISD::FP_EXTEND, DL, TmpVT, In)); + } + assert(VT.getVectorElementType() == MVT::f32 && "Unexpected fpext"); + MVT NVT = SVT.changeVectorElementType(MVT::i32); + In = DAG.getBitcast(SVT.changeTypeToInteger(), In); + In = DAG.getNode(ISD::ZERO_EXTEND, DL, NVT, In); + In = DAG.getNode(ISD::SHL, DL, NVT, In, DAG.getConstant(16, DL, NVT)); + return DAG.getBitcast(VT, In); + } + if (SVT.getVectorElementType() == MVT::f16) { + if (Subtarget.hasFP16() && isTypeLegal(SVT)) + return Op; assert(Subtarget.hasF16C() && "Unexpected features!"); if (SVT == MVT::v2f16) In = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f16, In, @@ -24033,6 +24053,12 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { return Res; } + if (VT.getScalarType() == MVT::bf16) { + if (SVT.getScalarType() == MVT::f32 && isTypeLegal(VT)) + return Op; + return SDValue(); + } + if (VT.getScalarType() == MVT::f16 && !Subtarget.hasFP16()) { if (!Subtarget.hasF16C() || SVT.getScalarType() != MVT::f32) return SDValue(); @@ -25676,7 +25702,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, if (isFP) { MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); assert(EltVT == MVT::f16 || EltVT == MVT::f32 || EltVT == MVT::f64); - if (isSoftFP16(EltVT, Subtarget)) + if (isSoftF16(EltVT, Subtarget)) return SDValue(); bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; @@ -26241,7 +26267,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(IsStrict ? 3 : 2))->get(); - if (isSoftFP16(Op0.getValueType())) + if (isSoftF16(Op0.getValueType(), Subtarget)) return SDValue(); // Handle f128 first, since one possible outcome is a normal integer @@ -26434,7 +26460,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { MVT VT = Op1.getSimpleValueType(); SDValue CC; - if (isSoftFP16(VT)) { + if (isSoftF16(VT, Subtarget)) { MVT NVT = VT.changeTypeToInteger(); return DAG.getBitcast(VT, DAG.getNode(ISD::SELECT, DL, NVT, Cond, DAG.getBitcast(NVT, Op1), @@ -26506,7 +26532,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { } if (Cond.getOpcode() == ISD::SETCC && - !isSoftFP16(Cond.getOperand(0).getSimpleValueType())) { + !isSoftF16(Cond.getOperand(0).getSimpleValueType(), Subtarget)) { if (SDValue NewCond = LowerSETCC(Cond, DAG)) { Cond = NewCond; // If the condition was updated, it's possible that the operands of the @@ -27196,7 +27222,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // Bail out when we don't have native compare instructions. if (Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0).getValueType() != MVT::f128 && - !isSoftFP16(Cond.getOperand(0).getValueType())) { + !isSoftF16(Cond.getOperand(0).getValueType(), Subtarget)) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); ISD::CondCode CC = cast(Cond.getOperand(2))->get(); @@ -34983,7 +35009,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT SrcVT = Src.getValueType(); SDValue Res; - if (isSoftFP16(SrcVT)) { + if (isSoftF16(SrcVT, Subtarget)) { EVT NVT = VT.isVector() ? VT.changeVectorElementType(MVT::f32) : MVT::f32; if (IsStrict) { Res = @@ -47383,7 +47409,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, // ignored in unsafe-math mode). // We also try to create v2f32 min/max nodes, which we later widen to v4f32. if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && - VT != MVT::f80 && VT != MVT::f128 && !isSoftFP16(VT, Subtarget) && + VT != MVT::f80 && VT != MVT::f128 && !isSoftF16(VT, Subtarget) && (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && (Subtarget.hasSSE2() || (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { @@ -47700,7 +47726,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, } // Early exit check - if (!TLI.isTypeLegal(VT) || isSoftFP16(VT, Subtarget)) + if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget)) return SDValue(); if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) @@ -54550,7 +54576,7 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { EVT VT = N->getValueType(0); - if (Subtarget.useSoftFloat() || isSoftFP16(VT, Subtarget)) + if (Subtarget.useSoftFloat() || isSoftF16(VT, Subtarget)) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 250df82a30c2f..047d8f0210470 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1749,8 +1749,6 @@ namespace llvm { bool needsCmpXchgNb(Type *MemType) const; - template bool isSoftFP16(T VT) const; - void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI) const; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ecb5c3e912401..b5dac7a0c65af 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -12976,6 +12976,11 @@ let Predicates = [HasBF16, HasVLX] in { def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))), (VPBROADCASTWZ256rr VR128X:$src)>; + def : Pat<(v8bf16 (X86vfpround (v8f32 VR256X:$src))), + (VCVTNEPS2BF16Z256rr VR256X:$src)>; + def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), + (VCVTNEPS2BF16Z256rm addr:$src)>; + // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. } @@ -12985,6 +12990,11 @@ let Predicates = [HasBF16] in { def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))), (VPBROADCASTWZrr VR128X:$src)>; + + def : Pat<(v16bf16 (X86vfpround (v16f32 VR512:$src))), + (VCVTNEPS2BF16Zrr VR512:$src)>; + def : Pat<(v16bf16 (X86vfpround (loadv16f32 addr:$src))), + (VCVTNEPS2BF16Zrm addr:$src)>; // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far. } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 6c57eceab3769..a6fcc804e1d06 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8289,6 +8289,11 @@ let Predicates = [HasAVXNECONVERT] in { f256mem>, T8PS; let checkVEXPredicate = 1 in defm VCVTNEPS2BF16 : VCVTNEPS2BF16_BASE, VEX, T8XS, ExplicitVEXPrefix; + + def : Pat<(v8bf16 (X86vfpround (v8f32 VR256:$src))), + (VCVTNEPS2BF16Yrr VR256:$src)>; + def : Pat<(v8bf16 (X86vfpround (loadv8f32 addr:$src))), + (VCVTNEPS2BF16Yrm addr:$src)>; } def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}", diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp index d78ab1c1ea284..d0606c15f3d5b 100644 --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB, for (auto *DA : It->second) Visited.insert(DA->getParent()); + SmallPtrSet EscapingBBs; + for (auto *U : CB->users()) { + // The use from coroutine intrinsics are not a problem. + if (isa(U)) + continue; + + // Think all other usages may be an escaping candidate conservatively. + // + // Note that the major user of switch ABI coroutine (the C++) will store + // resume.fn, destroy.fn and the index to the coroutine frame immediately. + // So the parent of the coro.begin in C++ will be always escaping. + // Then we can't get any performance benefits for C++ by improving the + // precision of the method. + // + // The reason why we still judge it is we want to make LLVM Coroutine in + // switch ABIs to be self contained as much as possible instead of a + // by-product of C++20 Coroutines. + EscapingBBs.insert(cast(U)->getParent()); + } + + bool PotentiallyEscaped = false; + do { const auto *BB = Worklist.pop_back_val(); if (!Visited.insert(BB).second) continue; - if (TIs.count(BB)) - return true; + + // A Path insensitive marker to test whether the coro.begin escapes. + // It is intentional to make it path insensitive while it may not be + // precise since we don't want the process to be too slow. + PotentiallyEscaped |= EscapingBBs.count(BB); + + if (TIs.count(BB)) { + if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped) + return true; + + // If the function ends with the exceptional terminator, the memory used + // by the coroutine frame can be released by stack unwinding + // automatically. So we can think the coro.begin doesn't escape if it + // exits the function by exceptional terminator. + + continue; + } // Conservatively say that there is potentially a path. if (!--Limit) @@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const { // memory location storing that value and not the virtual register. SmallPtrSet Terminators; - // First gather all of the non-exceptional terminators for the function. + // First gather all of the terminators for the function. // Consider the final coro.suspend as the real terminator when the current // function is a coroutine. - for (BasicBlock &B : *F) { - auto *TI = B.getTerminator(); - if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() && - !isa(TI)) - Terminators.insert(&B); - } + for (BasicBlock &B : *F) { + auto *TI = B.getTerminator(); + + if (TI->getNumSuccessors() != 0 || isa(TI)) + continue; + + Terminators.insert(&B); + } // Filter out the coro.destroy that lie along exceptional paths. SmallPtrSet ReferencedCoroBegins; for (const auto &It : DestroyAddr) { - // If there is any coro.destroy dominates all of the terminators for the - // coro.begin, we could know the corresponding coro.begin wouldn't escape. - for (Instruction *DA : It.second) { - if (llvm::all_of(Terminators, [&](auto *TI) { - return DT.dominates(DA, TI->getTerminator()); - })) { - ReferencedCoroBegins.insert(It.first); - break; - } - } - - // Whether there is any paths from coro.begin to Terminators which not pass - // through any of the coro.destroys. + // If every terminators is dominated by coro.destroy, we could know the + // corresponding coro.begin wouldn't escape. + // + // Otherwise hasEscapePath would decide whether there is any paths from + // coro.begin to Terminators which not pass through any of the + // coro.destroys. // // hasEscapePath is relatively slow, so we avoid to run it as much as // possible. - if (!ReferencedCoroBegins.count(It.first) && + if (llvm::all_of(Terminators, + [&](auto *TI) { + return llvm::any_of(It.second, [&](auto *DA) { + return DT.dominates(DA, TI->getTerminator()); + }); + }) || !hasEscapePath(It.first, Terminators)) ReferencedCoroBegins.insert(It.first); } diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 15628d32280d8..2b88dd08d88b6 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -424,7 +424,7 @@ static Decomposition decompose(Value *V, return MergeResults(Op0, Op1, IsSigned); ConstantInt *CI; - if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI)))) { + if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI))) && canUseSExt(CI)) { auto Result = decompose(Op0, Preconditions, IsSigned, DL); Result.mul(CI->getSExtValue()); return Result; diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp index 24390f1b54f60..5b8f1b00dc034 100644 --- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp +++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp @@ -1269,6 +1269,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { if (IsLoadCSE) { LoadInst *NLoadI = cast(AvailableVal); combineMetadataForCSE(NLoadI, LoadI, false); + LVI->forgetValue(NLoadI); }; // If the returned value is the load itself, replace with poison. This can @@ -1461,6 +1462,7 @@ bool JumpThreadingPass::simplifyPartiallyRedundantLoad(LoadInst *LoadI) { for (LoadInst *PredLoadI : CSELoads) { combineMetadataForCSE(PredLoadI, LoadI, true); + LVI->forgetValue(PredLoadI); } LoadI->replaceAllUsesWith(PN); diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-st-lane-aa.ll b/llvm/test/CodeGen/AArch64/arm64-neon-st-lane-aa.ll new file mode 100644 index 0000000000000..7642597c91f2b --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-neon-st-lane-aa.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=arm64-none-linux-gnu -mattr=+neon -O2 | FileCheck %s + +; st2 must before two ldrb. +; The situation that put one ldrb before st2 because of the conservative memVT set for st2lane, +; which lead to basic-aa goes wrong. + +define dso_local i32 @test_vst2_lane_u8([2 x <8 x i8>] %vectors.coerce) local_unnamed_addr { +; CHECK-LABEL: test_vst2_lane_u8: +; CHECK: st2 { v[[V1:[0-9]+]].b, v[[V2:[0-9]+]].b }[6], [x8] +; CHECK-NEXT: umov w[[W1:[0-9]+]], v[[V12:[0-9]+]].b[6] +; CHECK-NEXT: ldrb w[[W2:[0-9]+]], [sp, #12] +; CHECK-NEXT: ldrb w[[W2:[0-9]+]], [sp, #13] +entry: + %temp = alloca [2 x i8], align 4 + %vectors.coerce.fca.0.extract = extractvalue [2 x <8 x i8>] %vectors.coerce, 0 + %vectors.coerce.fca.1.extract = extractvalue [2 x <8 x i8>] %vectors.coerce, 1 + call void @llvm.lifetime.start.p0(i64 2, ptr nonnull %temp) #4 + call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %vectors.coerce.fca.0.extract, <8 x i8> %vectors.coerce.fca.1.extract, i64 6, ptr nonnull %temp) + %0 = load i8, ptr %temp, align 4 + %vget_lane = extractelement <8 x i8> %vectors.coerce.fca.0.extract, i64 6 + %cmp8.not = icmp ne i8 %0, %vget_lane + %arrayidx3.1 = getelementptr inbounds [2 x i8], ptr %temp, i64 0, i64 1 + %1 = load i8, ptr %arrayidx3.1, align 1 + %vget_lane.1 = extractelement <8 x i8> %vectors.coerce.fca.1.extract, i64 6 + %cmp8.not.1 = icmp ne i8 %1, %vget_lane.1 + %or.cond = select i1 %cmp8.not, i1 true, i1 %cmp8.not.1 + %cmp.lcssa = zext i1 %or.cond to i32 + call void @llvm.lifetime.end.p0(i64 2, ptr nonnull %temp) #4 + ret i32 %cmp.lcssa +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 +declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr nocapture) #2 +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 diff --git a/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir new file mode 100644 index 0000000000000..6fe094cc6cbb4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/emit_fneg_with_non_register_operand.mir @@ -0,0 +1,130 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple aarch64 -run-pass=machine-combiner -o - %s | FileCheck %s +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + target triple = "aarch64" + + @c = global double 0.000000e+00, align 8 + + define void @emit_fneg_with_non_register_operand(double %c) { + entry: + %0 = load double, ptr @c, align 8 + %1 = tail call double asm sideeffect "", "=w,0"(double %0) + %2 = load double, ptr @c, align 8 + %3 = tail call double asm sideeffect "", "=w,0"(double %2) + %fneg = fneg double %1 + %cmp = fcmp oeq double %3, %fneg + br i1 %cmp, label %if.then, label %if.end + + if.then: ; preds = %entry + tail call void @b(double noundef %1) + ret void + + if.end: ; preds = %entry + ret void + } + + declare void @b(double noundef) + +... +--- +name: emit_fneg_with_non_register_operand +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: fpr64, preferred-register: '' } + - { id: 1, class: fpr64, preferred-register: '' } + - { id: 2, class: fpr64, preferred-register: '' } + - { id: 3, class: fpr64, preferred-register: '' } + - { id: 4, class: fpr64, preferred-register: '' } + - { id: 5, class: fpr64, preferred-register: '' } + - { id: 6, class: gpr64common, preferred-register: '' } + - { id: 7, class: fpr64, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: true + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: emit_fneg_with_non_register_operand + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LOADgot:%[0-9]+]]:gpr64common = LOADgot target-flags(aarch64-got) @c + ; CHECK-NEXT: [[LDRDui:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, [[LDRDui]](tied-def 3) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr64 = COPY %2 + ; CHECK-NEXT: [[LDRDui1:%[0-9]+]]:fpr64 = LDRDui [[LOADgot]], 0 :: (dereferenceable load (s64) from @c) + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, [[LDRDui1]](tied-def 3) + ; CHECK-NEXT: [[FNEGDr:%[0-9]+]]:fpr64 = FNEGDr %2 + ; CHECK-NEXT: nofpexcept FCMPDrr %4, killed [[FNEGDr]], implicit-def $nzcv, implicit $fpcr + ; CHECK-NEXT: Bcc 1, %bb.2, implicit $nzcv + ; CHECK-NEXT: B %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.then: + ; CHECK-NEXT: $d0 = COPY [[COPY]] + ; CHECK-NEXT: TCRETURNdi @b, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.end: + ; CHECK-NEXT: RET_ReallyLR + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + + %6:gpr64common = LOADgot target-flags(aarch64-got) @c + %3:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) + INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %2, 2147483657 /* reguse tiedto:$0 */, %3(tied-def 3) + %0:fpr64 = COPY %2 + %5:fpr64 = LDRDui %6, 0 :: (dereferenceable load (s64) from @c) + INLINEASM &"", 1 /* sideeffect attdialect */, 2359306 /* regdef:FPR64 */, def %4, 2147483657 /* reguse tiedto:$0 */, %5(tied-def 3) + %7:fpr64 = FNEGDr %2 + nofpexcept FCMPDrr %4, killed %7, implicit-def $nzcv, implicit $fpcr + Bcc 1, %bb.2, implicit $nzcv + B %bb.1 + + bb.1.if.then: + $d0 = COPY %0 + TCRETURNdi @b, 0, csr_aarch64_aapcs, implicit $sp, implicit $d0 + + bb.2.if.end: + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/memtag-loop-nzcv.ll b/llvm/test/CodeGen/AArch64/memtag-loop-nzcv.ll new file mode 100644 index 0000000000000..86bafd1c93bc1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/memtag-loop-nzcv.ll @@ -0,0 +1,59 @@ +; RUN: llc -O2 -print-after-isel -mtriple=aarch64-linux-gnu %s -o /dev/null 2>&1 | FileCheck %s --check-prefixes=CHECK + +; This test function includes a 256-byte buffer. We expect it to require its +; MTE tags to be set to a useful value on entry, and cleared again on exit. At +; the time of writing this test, the pseudo-instructions chosen are +; STGloop_wback and STGloop respectively, but if different pseudos are selected +; in future, that's not a problem. The important thing is that both should +; include that implicit-def of $nzcv, because these pseudo-instructions will +; expand into loops that use the flags for their termination tests. + +; CHECK: STGloop_wback 256, {{.*}}, implicit-def dead $nzcv +; CHECK: STGloop 256, {{.*}}, implicit-def dead $nzcv + +define i32 @foo(i32 noundef %0) #0 { + %2 = alloca i32, align 4 + %3 = alloca [256 x i8], align 1 + %4 = alloca i64, align 8 + %5 = alloca i32, align 4 + %6 = alloca i64, align 8 + store i32 %0, ptr %2, align 4 + %7 = load i32, ptr %2, align 4 + %8 = getelementptr inbounds [256 x i8], ptr %3, i64 0, i64 0 + %9 = call i64 @read(i32 noundef %7, ptr noundef %8, i64 noundef 256) + store i64 %9, ptr %4, align 8 + store i32 0, ptr %5, align 4 + store i64 0, ptr %6, align 8 + br label %10 + +10: ; preds = %21, %1 + %11 = load i64, ptr %6, align 8 + %12 = load i64, ptr %4, align 8 + %13 = icmp ult i64 %11, %12 + br i1 %13, label %14, label %24 + +14: ; preds = %10 + %15 = load i64, ptr %6, align 8 + %16 = getelementptr inbounds [256 x i8], ptr %3, i64 0, i64 %15 + %17 = load i8, ptr %16, align 1 + %18 = zext i8 %17 to i32 + %19 = load i32, ptr %5, align 4 + %20 = add nsw i32 %19, %18 + store i32 %20, ptr %5, align 4 + br label %21 + +21: ; preds = %14 + %22 = load i64, ptr %6, align 8 + %23 = add i64 %22, 1 + store i64 %23, ptr %6, align 8 + br label %10 + +24: ; preds = %10 + %25 = load i32, ptr %5, align 4 + %26 = srem i32 %25, 251 + ret i32 %26 +} + +declare i64 @read(i32 noundef, ptr noundef, i64 noundef) + +attributes #0 = { sanitize_memtag "target-features"="+mte" } diff --git a/llvm/test/CodeGen/AArch64/multi-vector-load-size.ll b/llvm/test/CodeGen/AArch64/multi-vector-load-size.ll new file mode 100644 index 0000000000000..ecb953366a88e --- /dev/null +++ b/llvm/test/CodeGen/AArch64/multi-vector-load-size.ll @@ -0,0 +1,106 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -stop-after=instruction-select < %s | FileCheck %s + +%struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> } +%struct.__neon_float32x2x3_t = type { <2 x float>, <2 x float>, <2 x float> } +%struct.__neon_float32x2x4_t = type { <2 x float>, <2 x float>, <2 x float>, <2 x float> } + +declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4.v2f32.p0f32(float*) + +declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*) + +declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) +declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) + +declare %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) +declare %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) +declare %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) + + +define %struct.__neon_float32x2x2_t @test_ld2(float* %addr) { + ; CHECK-LABEL: name: test_ld2 + ; CHECK: LD2Twov2s {{.*}} :: (load (s128) {{.*}}) + %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x2_t %val +} + +define %struct.__neon_float32x2x3_t @test_ld3(float* %addr) { + ; CHECK-LABEL: name: test_ld3 + ; CHECK: LD3Threev2s {{.*}} :: (load (s192) {{.*}}) + %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x3_t %val +} + +define %struct.__neon_float32x2x4_t @test_ld4(float* %addr) { + ; CHECK-LABEL: name: test_ld4 + ; CHECK: LD4Fourv2s {{.*}} :: (load (s256) {{.*}}) + %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x4_t %val +} + +define %struct.__neon_float32x2x2_t @test_ld1x2(float* %addr) { + ; CHECK-LABEL: name: test_ld1x2 + ; CHECK: LD1Twov2s {{.*}} :: (load (s128) {{.*}}) + %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x2_t %val +} + +define %struct.__neon_float32x2x3_t @test_ld1x3(float* %addr) { + ; CHECK-LABEL: name: test_ld1x3 + ; CHECK: LD1Threev2s {{.*}} :: (load (s192) {{.*}}) + %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x3_t %val +} + +define %struct.__neon_float32x2x4_t @test_ld1x4(float* %addr) { + ; CHECK-LABEL: name: test_ld1x4 + ; CHECK: LD1Fourv2s {{.*}} :: (load (s256) {{.*}}) + %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x4_t %val +} + +define %struct.__neon_float32x2x2_t @test_ld2r(float* %addr) { + ; CHECK-LABEL: name: test_ld2r + ; CHECK: LD2Rv2s {{.*}} :: (load (s64) {{.*}}) + %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x2_t %val +} + +define %struct.__neon_float32x2x3_t @test_ld3r(float* %addr) { + ; CHECK-LABEL: name: test_ld3r + ; CHECK: LD3Rv2s {{.*}} :: (load (s96) {{.*}}) + %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x3_t %val +} + +define %struct.__neon_float32x2x4_t @test_ld4r(float* %addr) { + ; CHECK-LABEL: name: test_ld4r + ; CHECK: LD4Rv2s {{.*}} :: (load (s128) {{.*}}) + %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %addr) + ret %struct.__neon_float32x2x4_t %val +} + +define %struct.__neon_float32x2x2_t @test_ld2lane(<2 x float> %a, <2 x float> %b, float* %addr) { + ; CHECK-LABEL: name: test_ld2lane + ; CHECK: {{.*}} LD2i32 {{.*}} + %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, i64 1, float* %addr) + ret %struct.__neon_float32x2x2_t %val +} + +define %struct.__neon_float32x2x3_t @test_ld3lane(<2 x float> %a, <2 x float> %b, <2 x float> %c, float* %addr) { + ; CHECK-LABEL: name: test_ld3lane + ; CHECK: {{.*}} LD3i32 {{.*}} + %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, i64 1, float* %addr) + ret %struct.__neon_float32x2x3_t %val +} + +define %struct.__neon_float32x2x4_t @test_ld4lane(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d, float* %addr) { + ; CHECK-LABEL: name: test_ld4lane + ; CHECK: {{.*}} LD4i32 {{.*}} + %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %a, <2 x float> %b, <2 x float> %c, <2 x float> %d, i64 1, float* %addr) + ret %struct.__neon_float32x2x4_t %val +} \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll b/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll index 5763ec61667f2..3710db9c47ff6 100644 --- a/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll +++ b/llvm/test/CodeGen/AArch64/multi-vector-store-size.ll @@ -23,8 +23,6 @@ define void @addstx(ptr %res, ptr %a, ptr %b, ptr %c, ptr %d) { %cr = fadd <4 x float> %cl, %dl %dr = fadd <4 x float> %dl, %al -; The sizes below are conservative. AArch64TargetLowering -; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %ar, <4 x float> %br, ptr %res) ; CHECK: ST2Twov4s {{.*}} :: (store (s256) {{.*}}) tail call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, ptr %res) @@ -46,8 +44,6 @@ define void @addst1x(ptr %res, ptr %a, ptr %b, ptr %c, ptr %d) { %cr = fadd <4 x float> %cl, %dl %dr = fadd <4 x float> %dl, %al -; The sizes below are conservative. AArch64TargetLowering -; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %ar, <4 x float> %br, ptr %res) ; CHECK: ST1Twov4s {{.*}} :: (store (s256) {{.*}}) tail call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, ptr %res) @@ -69,14 +65,12 @@ define void @addstxlane(ptr %res, ptr %a, ptr %b, ptr %c, ptr %d) { %cr = fadd <4 x float> %cl, %dl %dr = fadd <4 x float> %dl, %al -; The sizes below are conservative. AArch64TargetLowering -; conservatively assumes the entire vector is stored. tail call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %ar, <4 x float> %br, i64 1, ptr %res) -; CHECK: ST2i32 {{.*}} :: (store (s256) {{.*}}) +; CHECK: ST2i32 {{.*}} :: (store (s64) {{.*}}) tail call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, i64 1, ptr %res) -; CHECK: ST3i32 {{.*}} :: (store (s384) {{.*}}) +; CHECK: ST3i32 {{.*}} :: (store (s96) {{.*}}) tail call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %ar, <4 x float> %br, <4 x float> %cr, <4 x float> %dr, i64 1, ptr %res) -; CHECK: ST4i32 {{.*}} :: (store (s512) {{.*}}) +; CHECK: ST4i32 {{.*}} :: (store (s128) {{.*}}) ret void } diff --git a/llvm/test/CodeGen/RISCV/pr65025.ll b/llvm/test/CodeGen/RISCV/pr65025.ll new file mode 100644 index 0000000000000..dcd71edc460b8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/pr65025.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=riscv64 -mattr=+a | FileCheck %s + +define ptr @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %val) nounwind { +; CHECK-LABEL: cmpxchg_masked_and_branch1: +; CHECK: # %bb.0: # %do_cmpxchg +; CHECK-NEXT: andi a3, a0, -4 +; CHECK-NEXT: slli a4, a0, 3 +; CHECK-NEXT: li a5, 255 +; CHECK-NEXT: sllw a5, a5, a4 +; CHECK-NEXT: andi a1, a1, 255 +; CHECK-NEXT: sllw a1, a1, a4 +; CHECK-NEXT: andi a2, a2, 255 +; CHECK-NEXT: sllw a2, a2, a4 +; CHECK-NEXT: .LBB0_3: # %do_cmpxchg +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: lr.w.aqrl a4, (a3) +; CHECK-NEXT: and a6, a4, a5 +; CHECK-NEXT: bne a6, a1, .LBB0_5 +; CHECK-NEXT: # %bb.4: # %do_cmpxchg +; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: xor a6, a4, a2 +; CHECK-NEXT: and a6, a6, a5 +; CHECK-NEXT: xor a6, a4, a6 +; CHECK-NEXT: sc.w.rl a6, a6, (a3) +; CHECK-NEXT: bnez a6, .LBB0_3 +; CHECK-NEXT: .LBB0_5: # %do_cmpxchg +; CHECK-NEXT: and a2, a4, a5 +; CHECK-NEXT: bne a1, a2, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %returnptr +; CHECK-NEXT: xor a1, a1, a2 +; CHECK-NEXT: snez a1, a1 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: # %exit +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +do_cmpxchg: + %0 = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst + %1 = extractvalue { i8, i1 } %0, 1 + %2 = select i1 %1, ptr %ptr, ptr null + br i1 %1, label %returnptr, label %exit +returnptr: + ret ptr %2 +exit: + ret ptr null +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index e74daee7cdddd..35d9b27c75f7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -28,7 +28,7 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -58,7 +58,7 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret @@ -239,7 +239,7 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu ; RV32-NEXT: vle32.v v8, (a0), v0.t ; RV32-NEXT: vse32.v v8, (a3) ; RV32-NEXT: ret @@ -269,7 +269,7 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, mu +; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu ; RV64-NEXT: vle32.v v8, (a0), v0.t ; RV64-NEXT: vse32.v v8, (a3) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 7620ba5310720..3c6515595b642 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -1065,3 +1065,15 @@ define @vmerge_larger_vl_poison_passthru( % ret %b } +; The vadd's new policy should be tail undisturbed since the false op of the +; vmerge moves from the the body to the tail, and we need to preserve it. +define @vmerge_larger_vl_false_becomes_tail( %false, %x, %y, %m) { +; CHECK-LABEL: vmerge_larger_vl_false_becomes_tail: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, mu +; CHECK-NEXT: vadd.vv v8, v9, v10, v0.t +; CHECK-NEXT: ret + %a = call @llvm.riscv.vadd.nxv2i32.nxv2i32( poison, %x, %y, i64 2) + %b = call @llvm.riscv.vmerge.nxv2i32.nxv2i32( poison, %false, %a, %m, i64 3) + ret %b +} diff --git a/llvm/test/CodeGen/RISCV/zcmp-prolog-epilog-crash.mir b/llvm/test/CodeGen/RISCV/zcmp-prolog-epilog-crash.mir new file mode 100644 index 0000000000000..64556ec0b343a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zcmp-prolog-epilog-crash.mir @@ -0,0 +1,158 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# REQUIRES: asserts +# RUN: llc %s -o - -mtriple=riscv32 -mattr=+zcmp -target-abi ilp32 -run-pass=prologepilog \ +# RUN: -simplify-mir -verify-machineinstrs | FileCheck %s + +--- | + define hidden void @f(fp128 %a) local_unnamed_addr #0 { + entry: + %0 = bitcast fp128 %a to i128 + %and.i = lshr i128 %0, 112 + %1 = trunc i128 %and.i to i32 + %2 = and i32 %1, 32767 + %or.i = or i128 poison, 5192296858534827628530496329220096 + br label %if.end.i + + if.end.i: ; preds = %entry + br i1 poison, label %exit, label %if.then12.i + + if.then12.i: ; preds = %if.end.i + %sub13.i = sub nuw nsw i32 16495, %2 + %sh_prom.i = zext i32 %sub13.i to i128 + %shr14.i = lshr i128 %or.i, %sh_prom.i + %conv15.i = trunc i128 %shr14.i to i32 + br label %exit + + exit: ; preds = %if.then12.i, %if.end.i + %retval.0.i = phi i32 [ %conv15.i, %if.then12.i ], [ -1, %if.end.i ] + ret void + } +... +--- +name: f +alignment: 2 +tracksRegLiveness: true +tracksDebugUserValues: true +liveins: + - { reg: '$x10' } +frameInfo: + maxAlignment: 1 + localFrameSize: 32 + savePoint: '%bb.2' + restorePoint: '%bb.2' +stack: + - { id: 0, size: 32, alignment: 1, local-offset: -32 } +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x10 = ADDI $x0, -1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.end.i: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BNE $x0, $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.then12.i: + ; CHECK-NEXT: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -32 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 32 + ; CHECK-NEXT: SB $x0, $x2, 31 :: (store (s8) into %stack.0 + 31) + ; CHECK-NEXT: SB $x0, $x2, 30 :: (store (s8) into %stack.0 + 30) + ; CHECK-NEXT: SB $x0, $x2, 29 :: (store (s8) into %stack.0 + 29) + ; CHECK-NEXT: SB $x0, $x2, 28 :: (store (s8) into %stack.0 + 28) + ; CHECK-NEXT: SB $x0, $x2, 27 :: (store (s8) into %stack.0 + 27) + ; CHECK-NEXT: SB $x0, $x2, 26 :: (store (s8) into %stack.0 + 26) + ; CHECK-NEXT: SB $x0, $x2, 25 :: (store (s8) into %stack.0 + 25) + ; CHECK-NEXT: SB $x0, $x2, 24 :: (store (s8) into %stack.0 + 24) + ; CHECK-NEXT: SB $x0, $x2, 23 :: (store (s8) into %stack.0 + 23) + ; CHECK-NEXT: SB $x0, $x2, 22 :: (store (s8) into %stack.0 + 22) + ; CHECK-NEXT: SB $x0, $x2, 21 :: (store (s8) into %stack.0 + 21) + ; CHECK-NEXT: SB $x0, $x2, 20 :: (store (s8) into %stack.0 + 20) + ; CHECK-NEXT: SB $x0, $x2, 19 :: (store (s8) into %stack.0 + 19) + ; CHECK-NEXT: SB $x0, $x2, 18 :: (store (s8) into %stack.0 + 18) + ; CHECK-NEXT: SB $x0, $x2, 17 :: (store (s8) into %stack.0 + 17) + ; CHECK-NEXT: SB $x0, $x2, 16 :: (store (s8) into %stack.0 + 16) + ; CHECK-NEXT: SB renamable $x10, $x2, 0 :: (store (s8) into %stack.0) + ; CHECK-NEXT: SB renamable $x10, $x2, 4 :: (store (s8) into %stack.0 + 4) + ; CHECK-NEXT: renamable $x11 = SRLI renamable $x10, 24 + ; CHECK-NEXT: SB renamable $x11, $x2, 3 :: (store (s8) into %stack.0 + 3) + ; CHECK-NEXT: renamable $x12 = SRLI renamable $x10, 16 + ; CHECK-NEXT: SB renamable $x12, $x2, 2 :: (store (s8) into %stack.0 + 2) + ; CHECK-NEXT: renamable $x13 = SRLI renamable $x10, 8 + ; CHECK-NEXT: SB renamable $x13, $x2, 1 :: (store (s8) into %stack.0 + 1) + ; CHECK-NEXT: SB renamable $x10, $x2, 8 :: (store (s8) into %stack.0 + 8) + ; CHECK-NEXT: SB renamable $x11, $x2, 7 :: (store (s8) into %stack.0 + 7) + ; CHECK-NEXT: SB renamable $x12, $x2, 6 :: (store (s8) into %stack.0 + 6) + ; CHECK-NEXT: SB renamable $x13, $x2, 5 :: (store (s8) into %stack.0 + 5) + ; CHECK-NEXT: SB killed renamable $x10, $x2, 12 :: (store (s8) into %stack.0 + 12) + ; CHECK-NEXT: SB renamable $x11, $x2, 11 :: (store (s8) into %stack.0 + 11) + ; CHECK-NEXT: SB renamable $x12, $x2, 10 :: (store (s8) into %stack.0 + 10) + ; CHECK-NEXT: SB renamable $x13, $x2, 9 :: (store (s8) into %stack.0 + 9) + ; CHECK-NEXT: SB killed renamable $x11, $x2, 15 :: (store (s8) into %stack.0 + 15) + ; CHECK-NEXT: SB killed renamable $x12, $x2, 14 :: (store (s8) into %stack.0 + 14) + ; CHECK-NEXT: SB killed renamable $x13, $x2, 13 :: (store (s8) into %stack.0 + 13) + ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 32 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.exit: + ; CHECK-NEXT: PseudoRET + bb.0.entry: + liveins: $x10 + + renamable $x10 = ADDI $x0, -1 + + bb.1.if.end.i: + liveins: $x10 + + BNE $x0, $x0, %bb.3 + PseudoBR %bb.2 + + bb.2.if.then12.i: + liveins: $x10 + + SB $x0, %stack.0, 31 :: (store (s8) into %stack.0 + 31) + SB $x0, %stack.0, 30 :: (store (s8) into %stack.0 + 30) + SB $x0, %stack.0, 29 :: (store (s8) into %stack.0 + 29) + SB $x0, %stack.0, 28 :: (store (s8) into %stack.0 + 28) + SB $x0, %stack.0, 27 :: (store (s8) into %stack.0 + 27) + SB $x0, %stack.0, 26 :: (store (s8) into %stack.0 + 26) + SB $x0, %stack.0, 25 :: (store (s8) into %stack.0 + 25) + SB $x0, %stack.0, 24 :: (store (s8) into %stack.0 + 24) + SB $x0, %stack.0, 23 :: (store (s8) into %stack.0 + 23) + SB $x0, %stack.0, 22 :: (store (s8) into %stack.0 + 22) + SB $x0, %stack.0, 21 :: (store (s8) into %stack.0 + 21) + SB $x0, %stack.0, 20 :: (store (s8) into %stack.0 + 20) + SB $x0, %stack.0, 19 :: (store (s8) into %stack.0 + 19) + SB $x0, %stack.0, 18 :: (store (s8) into %stack.0 + 18) + SB $x0, %stack.0, 17 :: (store (s8) into %stack.0 + 17) + SB $x0, %stack.0, 16 :: (store (s8) into %stack.0 + 16) + SB renamable $x10, %stack.0, 0 :: (store (s8) into %stack.0) + SB renamable $x10, %stack.0, 4 :: (store (s8) into %stack.0 + 4) + renamable $x11 = SRLI renamable $x10, 24 + SB renamable $x11, %stack.0, 3 :: (store (s8) into %stack.0 + 3) + renamable $x12 = SRLI renamable $x10, 16 + SB renamable $x12, %stack.0, 2 :: (store (s8) into %stack.0 + 2) + renamable $x13 = SRLI renamable $x10, 8 + SB renamable $x13, %stack.0, 1 :: (store (s8) into %stack.0 + 1) + SB renamable $x10, %stack.0, 8 :: (store (s8) into %stack.0 + 8) + SB renamable $x11, %stack.0, 7 :: (store (s8) into %stack.0 + 7) + SB renamable $x12, %stack.0, 6 :: (store (s8) into %stack.0 + 6) + SB renamable $x13, %stack.0, 5 :: (store (s8) into %stack.0 + 5) + SB killed renamable $x10, %stack.0, 12 :: (store (s8) into %stack.0 + 12) + SB renamable $x11, %stack.0, 11 :: (store (s8) into %stack.0 + 11) + SB renamable $x12, %stack.0, 10 :: (store (s8) into %stack.0 + 10) + SB renamable $x13, %stack.0, 9 :: (store (s8) into %stack.0 + 9) + SB killed renamable $x11, %stack.0, 15 :: (store (s8) into %stack.0 + 15) + SB killed renamable $x12, %stack.0, 14 :: (store (s8) into %stack.0 + 14) + SB killed renamable $x13, %stack.0, 13 :: (store (s8) into %stack.0 + 13) + + bb.3.exit: + PseudoRET + +... diff --git a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll index 293a67e59e0c9..b311c8831457b 100644 --- a/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avxneconvert-intrinsics.ll @@ -198,7 +198,6 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16128(<4 x float> %A) { ; CHECK-LABEL: test_int_x86_vcvtneps2bf16128: ; CHECK: # %bb.0: ; CHECK-NEXT: {vex} vcvtneps2bf16 %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0x72,0xc0] -; CHECK-NEXT: # kill: def $xmm1 killed $xmm0 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> %A) ret <8 x bfloat> %ret @@ -209,7 +208,6 @@ define <8 x bfloat> @test_int_x86_vcvtneps2bf16256(<8 x float> %A) { ; CHECK-LABEL: test_int_x86_vcvtneps2bf16256: ; CHECK: # %bb.0: ; CHECK-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 # encoding: [0xc4,0xe2,0x7e,0x72,0xc0] -; CHECK-NEXT: # kill: def $xmm1 killed $xmm0 ; CHECK-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ret = call <8 x bfloat> @llvm.x86.vcvtneps2bf16256(<8 x float> %A) diff --git a/llvm/test/CodeGen/X86/bfloat.ll b/llvm/test/CodeGen/X86/bfloat.ll index f680a39a482ec..7a82515ad24b7 100644 --- a/llvm/test/CodeGen/X86/bfloat.ll +++ b/llvm/test/CodeGen/X86/bfloat.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=CHECK,SSE2 -; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16 | FileCheck %s --check-prefixes=CHECK,BF16 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,F16,BF16 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avx512bf16,avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,AVX,F16,FP16 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=avxneconvert,f16c | FileCheck %s --check-prefixes=CHECK,AVX,AVXNC define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind { ; SSE2-LABEL: add: @@ -20,22 +22,22 @@ define void @add(ptr %pa, ptr %pb, ptr %pc) nounwind { ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: retq ; -; BF16-LABEL: add: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rbx -; BF16-NEXT: movq %rdx, %rbx -; BF16-NEXT: movzwl (%rsi), %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: movzwl (%rdi), %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm1 -; BF16-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: movw %ax, (%rbx) -; BF16-NEXT: popq %rbx -; BF16-NEXT: retq +; AVX-LABEL: add: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: movq %rdx, %rbx +; AVX-NEXT: movzwl (%rsi), %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: movw %ax, (%rbx) +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq %a = load bfloat, ptr %pa %b = load bfloat, ptr %pb %add = fadd bfloat %a, %b @@ -58,19 +60,19 @@ define bfloat @add2(bfloat %a, bfloat %b) nounwind { ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; -; BF16-LABEL: add2: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rax -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: vmovd %xmm1, %ecx -; BF16-NEXT: shll $16, %ecx -; BF16-NEXT: vmovd %ecx, %xmm0 -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm1 -; BF16-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: popq %rax -; BF16-NEXT: retq +; AVX-LABEL: add2: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rax +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: vmovd %xmm1, %ecx +; AVX-NEXT: shll $16, %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm1 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: retq %add = fadd bfloat %a, %b ret bfloat %add } @@ -105,34 +107,34 @@ define void @add_double(ptr %pa, ptr %pb, ptr %pc) nounwind { ; SSE2-NEXT: popq %rbp ; SSE2-NEXT: retq ; -; BF16-LABEL: add_double: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rbp -; BF16-NEXT: pushq %r14 -; BF16-NEXT: pushq %rbx -; BF16-NEXT: movq %rdx, %rbx -; BF16-NEXT: movq %rsi, %r14 -; BF16-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; BF16-NEXT: callq __truncdfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %ebp -; BF16-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; BF16-NEXT: callq __truncdfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: shll $16, %ebp -; BF16-NEXT: vmovd %ebp, %xmm1 -; BF16-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; BF16-NEXT: vmovsd %xmm0, (%rbx) -; BF16-NEXT: popq %rbx -; BF16-NEXT: popq %r14 -; BF16-NEXT: popq %rbp -; BF16-NEXT: retq +; AVX-LABEL: add_double: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbp +; AVX-NEXT: pushq %r14 +; AVX-NEXT: pushq %rbx +; AVX-NEXT: movq %rdx, %rbx +; AVX-NEXT: movq %rsi, %r14 +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: callq __truncdfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %ebp +; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: callq __truncdfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: shll $16, %ebp +; AVX-NEXT: vmovd %ebp, %xmm1 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: vmovsd %xmm0, (%rbx) +; AVX-NEXT: popq %rbx +; AVX-NEXT: popq %r14 +; AVX-NEXT: popq %rbp +; AVX-NEXT: retq %la = load double, ptr %pa %a = fptrunc double %la to bfloat %lb = load double, ptr %pb @@ -169,30 +171,30 @@ define double @add_double2(double %da, double %db) nounwind { ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: retq ; -; BF16-LABEL: add_double2: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rbx -; BF16-NEXT: subq $16, %rsp -; BF16-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; BF16-NEXT: callq __truncdfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %ebx -; BF16-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload -; BF16-NEXT: # xmm0 = mem[0],zero -; BF16-NEXT: callq __truncdfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: shll $16, %ebx -; BF16-NEXT: vmovd %ebx, %xmm1 -; BF16-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; BF16-NEXT: addq $16, %rsp -; BF16-NEXT: popq %rbx -; BF16-NEXT: retq +; AVX-LABEL: add_double2: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: subq $16, %rsp +; AVX-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; AVX-NEXT: callq __truncdfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %ebx +; AVX-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload +; AVX-NEXT: # xmm0 = mem[0],zero +; AVX-NEXT: callq __truncdfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: shll $16, %ebx +; AVX-NEXT: vmovd %ebx, %xmm1 +; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVX-NEXT: addq $16, %rsp +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq %a = fptrunc double %da to bfloat %b = fptrunc double %db to bfloat %add = fadd bfloat %a, %b @@ -215,19 +217,19 @@ define void @add_constant(ptr %pa, ptr %pc) nounwind { ; SSE2-NEXT: popq %rbx ; SSE2-NEXT: retq ; -; BF16-LABEL: add_constant: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rbx -; BF16-NEXT: movq %rsi, %rbx -; BF16-NEXT: movzwl (%rdi), %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: movw %ax, (%rbx) -; BF16-NEXT: popq %rbx -; BF16-NEXT: retq +; AVX-LABEL: add_constant: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rbx +; AVX-NEXT: movq %rsi, %rbx +; AVX-NEXT: movzwl (%rdi), %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: movw %ax, (%rbx) +; AVX-NEXT: popq %rbx +; AVX-NEXT: retq %a = load bfloat, ptr %pa %add = fadd bfloat %a, 1.0 store bfloat %add, ptr %pc @@ -246,16 +248,16 @@ define bfloat @add_constant2(bfloat %a) nounwind { ; SSE2-NEXT: popq %rax ; SSE2-NEXT: retq ; -; BF16-LABEL: add_constant2: -; BF16: # %bb.0: -; BF16-NEXT: pushq %rax -; BF16-NEXT: vmovd %xmm0, %eax -; BF16-NEXT: shll $16, %eax -; BF16-NEXT: vmovd %eax, %xmm0 -; BF16-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; BF16-NEXT: callq __truncsfbf2@PLT -; BF16-NEXT: popq %rax -; BF16-NEXT: retq +; AVX-LABEL: add_constant2: +; AVX: # %bb.0: +; AVX-NEXT: pushq %rax +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: shll $16, %eax +; AVX-NEXT: vmovd %eax, %xmm0 +; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: callq __truncsfbf2@PLT +; AVX-NEXT: popq %rax +; AVX-NEXT: retq %add = fadd bfloat %a, 1.0 ret bfloat %add } @@ -540,6 +542,235 @@ define <8 x bfloat> @addv(<8 x bfloat> %a, <8 x bfloat> %b) nounwind { ; BF16-NEXT: popq %r15 ; BF16-NEXT: popq %rbp ; BF16-NEXT: retq +; +; FP16-LABEL: addv: +; FP16: # %bb.0: +; FP16-NEXT: pushq %rbp +; FP16-NEXT: pushq %r15 +; FP16-NEXT: pushq %r14 +; FP16-NEXT: pushq %r13 +; FP16-NEXT: pushq %r12 +; FP16-NEXT: pushq %rbx +; FP16-NEXT: subq $40, %rsp +; FP16-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; FP16-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; FP16-NEXT: vmovw %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm2 +; FP16-NEXT: vmovw %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm2, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $7, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $7, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %ebp +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $6, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $6, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %r14d +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $5, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $5, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %r15d +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $4, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $4, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %r12d +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $3, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $3, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %r13d +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $2, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $2, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %ebx +; FP16-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; FP16-NEXT: vpextrw $1, %xmm0, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm0 +; FP16-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload +; FP16-NEXT: vpextrw $1, %xmm1, %eax +; FP16-NEXT: shll $16, %eax +; FP16-NEXT: vmovd %eax, %xmm1 +; FP16-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; FP16-NEXT: callq __truncsfbf2@PLT +; FP16-NEXT: vmovd %xmm0, %eax +; FP16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; FP16-NEXT: # xmm0 = mem[0],zero,zero,zero +; FP16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $2, %ebx, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $3, %r13d, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $4, %r12d, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $5, %r15d, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $6, %r14d, %xmm0, %xmm0 +; FP16-NEXT: vpinsrw $7, %ebp, %xmm0, %xmm0 +; FP16-NEXT: addq $40, %rsp +; FP16-NEXT: popq %rbx +; FP16-NEXT: popq %r12 +; FP16-NEXT: popq %r13 +; FP16-NEXT: popq %r14 +; FP16-NEXT: popq %r15 +; FP16-NEXT: popq %rbp +; FP16-NEXT: retq +; +; AVXNC-LABEL: addv: +; AVXNC: # %bb.0: +; AVXNC-NEXT: pushq %rbp +; AVXNC-NEXT: pushq %r15 +; AVXNC-NEXT: pushq %r14 +; AVXNC-NEXT: pushq %r13 +; AVXNC-NEXT: pushq %r12 +; AVXNC-NEXT: pushq %rbx +; AVXNC-NEXT: subq $40, %rsp +; AVXNC-NEXT: vmovdqa %xmm1, (%rsp) # 16-byte Spill +; AVXNC-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vpextrw $7, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm2 +; AVXNC-NEXT: vpextrw $7, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm2, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $6, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $6, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %ebp +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $5, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $5, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %r14d +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $4, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $4, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %r15d +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $3, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $3, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %r12d +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $2, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $2, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %r13d +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vpextrw $1, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vpextrw $1, %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %ebx +; AVXNC-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload +; AVXNC-NEXT: vmovd %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; AVXNC-NEXT: vmovd %xmm1, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vaddss %xmm0, %xmm1, %xmm0 +; AVXNC-NEXT: callq __truncsfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vpinsrw $1, %ebx, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: addq $40, %rsp +; AVXNC-NEXT: popq %rbx +; AVXNC-NEXT: popq %r12 +; AVXNC-NEXT: popq %r13 +; AVXNC-NEXT: popq %r14 +; AVXNC-NEXT: popq %r15 +; AVXNC-NEXT: popq %rbp +; AVXNC-NEXT: retq %add = fadd <8 x bfloat> %a, %b ret <8 x bfloat> %add } @@ -554,13 +785,13 @@ define <2 x bfloat> @pr62997(bfloat %a, bfloat %b) { ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; SSE2-NEXT: retq ; -; BF16-LABEL: pr62997: -; BF16: # %bb.0: -; BF16-NEXT: vmovd %xmm1, %eax -; BF16-NEXT: vmovd %xmm0, %ecx -; BF16-NEXT: vmovd %ecx, %xmm0 -; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; BF16-NEXT: retq +; AVX-LABEL: pr62997: +; AVX: # %bb.0: +; AVX-NEXT: vmovd %xmm1, %eax +; AVX-NEXT: vmovd %xmm0, %ecx +; AVX-NEXT: vmovd %ecx, %xmm0 +; AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; AVX-NEXT: retq %1 = insertelement <2 x bfloat> undef, bfloat %a, i64 0 %2 = insertelement <2 x bfloat> %1, bfloat %b, i64 1 ret <2 x bfloat> %2 @@ -575,10 +806,16 @@ define <32 x bfloat> @pr63017() { ; SSE2-NEXT: xorps %xmm3, %xmm3 ; SSE2-NEXT: retq ; -; BF16-LABEL: pr63017: -; BF16: # %bb.0: -; BF16-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; BF16-NEXT: retq +; F16-LABEL: pr63017: +; F16: # %bb.0: +; F16-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; F16-NEXT: retq +; +; AVXNC-LABEL: pr63017: +; AVXNC: # %bb.0: +; AVXNC-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVXNC-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVXNC-NEXT: retq ret <32 x bfloat> zeroinitializer } @@ -1149,11 +1386,259 @@ define <32 x bfloat> @pr63017_2() nounwind { ; SSE2-NEXT: popq %r14 ; SSE2-NEXT: retq ; -; BF16-LABEL: pr63017_2: -; BF16: # %bb.0: -; BF16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024] -; BF16-NEXT: vmovdqu16 (%rax), %zmm0 {%k1} -; BF16-NEXT: retq +; F16-LABEL: pr63017_2: +; F16: # %bb.0: +; F16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024] +; F16-NEXT: vmovdqu16 (%rax), %zmm0 {%k1} +; F16-NEXT: retq +; +; AVXNC-LABEL: pr63017_2: +; AVXNC: # %bb.0: +; AVXNC-NEXT: vpbroadcastw {{.*#+}} ymm0 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024] +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: vmovdqa %ymm0, %ymm1 +; AVXNC-NEXT: jne .LBB12_2 +; AVXNC-NEXT: # %bb.1: # %cond.load +; AVXNC-NEXT: vpbroadcastw {{.*#+}} ymm1 = [49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024,49024] +; AVXNC-NEXT: vpbroadcastw {{.*#+}} xmm0 = [49024,49024,49024,49024,49024,49024,49024,49024] +; AVXNC-NEXT: vpinsrw $0, (%rax), %xmm0, %xmm0 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7] +; AVXNC-NEXT: .LBB12_2: # %else +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_4 +; AVXNC-NEXT: # %bb.3: # %cond.load1 +; AVXNC-NEXT: vpinsrw $1, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_4: # %else2 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_6 +; AVXNC-NEXT: # %bb.5: # %cond.load4 +; AVXNC-NEXT: vpinsrw $2, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_6: # %else5 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_8 +; AVXNC-NEXT: # %bb.7: # %cond.load7 +; AVXNC-NEXT: vpinsrw $3, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_8: # %else8 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_10 +; AVXNC-NEXT: # %bb.9: # %cond.load10 +; AVXNC-NEXT: vpinsrw $4, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_10: # %else11 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_12 +; AVXNC-NEXT: # %bb.11: # %cond.load13 +; AVXNC-NEXT: vpinsrw $5, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_12: # %else14 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_14 +; AVXNC-NEXT: # %bb.13: # %cond.load16 +; AVXNC-NEXT: vpinsrw $6, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_14: # %else17 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_16 +; AVXNC-NEXT: # %bb.15: # %cond.load19 +; AVXNC-NEXT: vpinsrw $7, (%rax), %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: .LBB12_16: # %else20 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_18 +; AVXNC-NEXT: # %bb.17: # %cond.load22 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm0[1,2,3,4,5,6,7],ymm2[8],ymm0[9,10,11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_18: # %else23 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_20 +; AVXNC-NEXT: # %bb.19: # %cond.load25 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0],ymm2[1],ymm0[2,3,4,5,6,7,8],ymm2[9],ymm0[10,11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_20: # %else26 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_22 +; AVXNC-NEXT: # %bb.21: # %cond.load28 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1],ymm2[2],ymm0[3,4,5,6,7,8,9],ymm2[10],ymm0[11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_22: # %else29 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_24 +; AVXNC-NEXT: # %bb.23: # %cond.load31 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_24: # %else32 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_26 +; AVXNC-NEXT: # %bb.25: # %cond.load34 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2,3],ymm2[4],ymm0[5,6,7,8,9,10,11],ymm2[12],ymm0[13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_26: # %else35 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_28 +; AVXNC-NEXT: # %bb.27: # %cond.load37 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2,3,4],ymm2[5],ymm0[6,7,8,9,10,11,12],ymm2[13],ymm0[14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_28: # %else38 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_30 +; AVXNC-NEXT: # %bb.29: # %cond.load40 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2,3,4,5],ymm2[6],ymm0[7,8,9,10,11,12,13],ymm2[14],ymm0[15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_30: # %else41 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_32 +; AVXNC-NEXT: # %bb.31: # %cond.load43 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm0[0,1,2,3,4,5,6],ymm2[7],ymm0[8,9,10,11,12,13,14],ymm2[15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_32: # %else44 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_34 +; AVXNC-NEXT: # %bb.33: # %cond.load46 +; AVXNC-NEXT: vpinsrw $0, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_34: # %else47 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_36 +; AVXNC-NEXT: # %bb.35: # %cond.load49 +; AVXNC-NEXT: vpinsrw $1, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_36: # %else50 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_38 +; AVXNC-NEXT: # %bb.37: # %cond.load52 +; AVXNC-NEXT: vpinsrw $2, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_38: # %else53 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_40 +; AVXNC-NEXT: # %bb.39: # %cond.load55 +; AVXNC-NEXT: vpinsrw $3, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_40: # %else56 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_42 +; AVXNC-NEXT: # %bb.41: # %cond.load58 +; AVXNC-NEXT: vpinsrw $4, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_42: # %else59 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_44 +; AVXNC-NEXT: # %bb.43: # %cond.load61 +; AVXNC-NEXT: vpinsrw $5, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_44: # %else62 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_46 +; AVXNC-NEXT: # %bb.45: # %cond.load64 +; AVXNC-NEXT: vpinsrw $6, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_46: # %else65 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_48 +; AVXNC-NEXT: # %bb.47: # %cond.load67 +; AVXNC-NEXT: vpinsrw $7, (%rax), %xmm1, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] +; AVXNC-NEXT: .LBB12_48: # %else68 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_50 +; AVXNC-NEXT: # %bb.49: # %cond.load70 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0],ymm1[1,2,3,4,5,6,7],ymm2[8],ymm1[9,10,11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_50: # %else71 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_52 +; AVXNC-NEXT: # %bb.51: # %cond.load73 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_52: # %else74 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_54 +; AVXNC-NEXT: # %bb.53: # %cond.load76 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_54: # %else77 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_56 +; AVXNC-NEXT: # %bb.55: # %cond.load79 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_56: # %else80 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_58 +; AVXNC-NEXT: # %bb.57: # %cond.load82 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3],ymm2[4],ymm1[5,6,7,8,9,10,11],ymm2[12],ymm1[13,14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_58: # %else83 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_60 +; AVXNC-NEXT: # %bb.59: # %cond.load85 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6,7,8,9,10,11,12],ymm2[13],ymm1[14,15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_60: # %else86 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_62 +; AVXNC-NEXT: # %bb.61: # %cond.load88 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3,4,5],ymm2[6],ymm1[7,8,9,10,11,12,13],ymm2[14],ymm1[15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_62: # %else89 +; AVXNC-NEXT: xorl %eax, %eax +; AVXNC-NEXT: testb %al, %al +; AVXNC-NEXT: jne .LBB12_64 +; AVXNC-NEXT: # %bb.63: # %cond.load91 +; AVXNC-NEXT: vpbroadcastw (%rax), %ymm2 +; AVXNC-NEXT: vpblendw {{.*#+}} ymm2 = ymm1[0,1,2,3,4,5,6],ymm2[7],ymm1[8,9,10,11,12,13,14],ymm2[15] +; AVXNC-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] +; AVXNC-NEXT: .LBB12_64: # %else92 +; AVXNC-NEXT: retq %1 = call <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr poison, i32 2, <32 x i1> poison, <32 x bfloat> ) ret <32 x bfloat> %1 } @@ -1173,14 +1658,806 @@ define <32 x bfloat> @pr62997_3(<32 x bfloat> %0, bfloat %1) { ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm4[0],xmm0[1] ; SSE2-NEXT: retq ; -; BF16-LABEL: pr62997_3: -; BF16: # %bb.0: -; BF16-NEXT: vmovd %xmm1, %eax -; BF16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1 -; BF16-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 -; BF16-NEXT: retq +; F16-LABEL: pr62997_3: +; F16: # %bb.0: +; F16-NEXT: vmovd %xmm1, %eax +; F16-NEXT: vpinsrw $1, %eax, %xmm0, %xmm1 +; F16-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; F16-NEXT: retq +; +; AVXNC-LABEL: pr62997_3: +; AVXNC: # %bb.0: +; AVXNC-NEXT: vmovd %xmm2, %eax +; AVXNC-NEXT: vpinsrw $1, %eax, %xmm0, %xmm2 +; AVXNC-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; AVXNC-NEXT: retq %3 = insertelement <32 x bfloat> %0, bfloat %1, i64 1 ret <32 x bfloat> %3 } declare <32 x bfloat> @llvm.masked.load.v32bf16.p0(ptr, i32, <32 x i1>, <32 x bfloat>) + +define <4 x float> @pr64460_1(<4 x bfloat> %a) { +; SSE2-LABEL: pr64460_1: +; SSE2: # %bb.0: +; SSE2-NEXT: pextrw $1, %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm1 +; SSE2-NEXT: pextrw $3, %xmm0, %eax +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm2 +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; AVX-LABEL: pr64460_1: +; AVX: # %bb.0: +; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; AVX-NEXT: retq + %b = fpext <4 x bfloat> %a to <4 x float> + ret <4 x float> %b +} + +define <8 x float> @pr64460_2(<8 x bfloat> %a) { +; SSE2-LABEL: pr64460_2: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %xmm0, %rdx +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: movq %xmm0, %rcx +; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: shrq $32, %rax +; SSE2-NEXT: movq %rdx, %rsi +; SSE2-NEXT: shrq $32, %rsi +; SSE2-NEXT: movl %edx, %edi +; SSE2-NEXT: andl $-65536, %edi # imm = 0xFFFF0000 +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: movl %edx, %edi +; SSE2-NEXT: shll $16, %edi +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: shrq $48, %rdx +; SSE2-NEXT: shll $16, %edx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: shll $16, %esi +; SSE2-NEXT: movd %esi, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: andl $-65536, %edx # imm = 0xFFFF0000 +; SSE2-NEXT: movd %edx, %xmm2 +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: shll $16, %edx +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; SSE2-NEXT: shrq $48, %rcx +; SSE2-NEXT: shll $16, %ecx +; SSE2-NEXT: movd %ecx, %xmm2 +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] +; SSE2-NEXT: retq +; +; AVX-LABEL: pr64460_2: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-NEXT: vpslld $16, %ymm0, %ymm0 +; AVX-NEXT: retq + %b = fpext <8 x bfloat> %a to <8 x float> + ret <8 x float> %b +} + +define <16 x float> @pr64460_3(<16 x bfloat> %a) { +; SSE2-LABEL: pr64460_3: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %xmm1, %rdi +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: movq %xmm1, %rcx +; SSE2-NEXT: movq %rcx, %rax +; SSE2-NEXT: shrq $32, %rax +; SSE2-NEXT: movq %xmm0, %r9 +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: movq %xmm0, %rsi +; SSE2-NEXT: movq %rsi, %rdx +; SSE2-NEXT: shrq $32, %rdx +; SSE2-NEXT: movq %rdi, %r8 +; SSE2-NEXT: shrq $32, %r8 +; SSE2-NEXT: movq %r9, %r10 +; SSE2-NEXT: shrq $32, %r10 +; SSE2-NEXT: movl %r9d, %r11d +; SSE2-NEXT: andl $-65536, %r11d # imm = 0xFFFF0000 +; SSE2-NEXT: movd %r11d, %xmm1 +; SSE2-NEXT: movl %r9d, %r11d +; SSE2-NEXT: shll $16, %r11d +; SSE2-NEXT: movd %r11d, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: shrq $48, %r9 +; SSE2-NEXT: shll $16, %r9d +; SSE2-NEXT: movd %r9d, %xmm1 +; SSE2-NEXT: shll $16, %r10d +; SSE2-NEXT: movd %r10d, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: movl %edi, %r9d +; SSE2-NEXT: andl $-65536, %r9d # imm = 0xFFFF0000 +; SSE2-NEXT: movd %r9d, %xmm1 +; SSE2-NEXT: movl %edi, %r9d +; SSE2-NEXT: shll $16, %r9d +; SSE2-NEXT: movd %r9d, %xmm2 +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: shrq $48, %rdi +; SSE2-NEXT: shll $16, %edi +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: shll $16, %r8d +; SSE2-NEXT: movd %r8d, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: movl %esi, %edi +; SSE2-NEXT: andl $-65536, %edi # imm = 0xFFFF0000 +; SSE2-NEXT: movd %edi, %xmm3 +; SSE2-NEXT: movl %esi, %edi +; SSE2-NEXT: shll $16, %edi +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; SSE2-NEXT: shrq $48, %rsi +; SSE2-NEXT: shll $16, %esi +; SSE2-NEXT: movd %esi, %xmm3 +; SSE2-NEXT: shll $16, %edx +; SSE2-NEXT: movd %edx, %xmm4 +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: andl $-65536, %edx # imm = 0xFFFF0000 +; SSE2-NEXT: movd %edx, %xmm4 +; SSE2-NEXT: movl %ecx, %edx +; SSE2-NEXT: shll $16, %edx +; SSE2-NEXT: movd %edx, %xmm3 +; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] +; SSE2-NEXT: shrq $48, %rcx +; SSE2-NEXT: shll $16, %ecx +; SSE2-NEXT: movd %ecx, %xmm4 +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm5 +; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0] +; SSE2-NEXT: retq +; +; F16-LABEL: pr64460_3: +; F16: # %bb.0: +; F16-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; F16-NEXT: vpslld $16, %zmm0, %zmm0 +; F16-NEXT: retq +; +; AVXNC-LABEL: pr64460_3: +; AVXNC: # %bb.0: +; AVXNC-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVXNC-NEXT: vpslld $16, %ymm1, %ymm2 +; AVXNC-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVXNC-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVXNC-NEXT: vpslld $16, %ymm0, %ymm1 +; AVXNC-NEXT: vmovdqa %ymm2, %ymm0 +; AVXNC-NEXT: retq + %b = fpext <16 x bfloat> %a to <16 x float> + ret <16 x float> %b +} + +define <8 x double> @pr64460_4(<8 x bfloat> %a) { +; SSE2-LABEL: pr64460_4: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %xmm0, %rsi +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: movq %xmm0, %rdx +; SSE2-NEXT: movq %rdx, %rax +; SSE2-NEXT: shrq $32, %rax +; SSE2-NEXT: movq %rdx, %rcx +; SSE2-NEXT: shrq $48, %rcx +; SSE2-NEXT: movq %rsi, %rdi +; SSE2-NEXT: shrq $32, %rdi +; SSE2-NEXT: movq %rsi, %r8 +; SSE2-NEXT: shrq $48, %r8 +; SSE2-NEXT: movl %esi, %r9d +; SSE2-NEXT: andl $-65536, %r9d # imm = 0xFFFF0000 +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: cvtss2sd %xmm0, %xmm1 +; SSE2-NEXT: shll $16, %esi +; SSE2-NEXT: movd %esi, %xmm0 +; SSE2-NEXT: cvtss2sd %xmm0, %xmm0 +; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: shll $16, %r8d +; SSE2-NEXT: movd %r8d, %xmm1 +; SSE2-NEXT: cvtss2sd %xmm1, %xmm2 +; SSE2-NEXT: shll $16, %edi +; SSE2-NEXT: movd %edi, %xmm1 +; SSE2-NEXT: cvtss2sd %xmm1, %xmm1 +; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: movl %edx, %esi +; SSE2-NEXT: andl $-65536, %esi # imm = 0xFFFF0000 +; SSE2-NEXT: movd %esi, %xmm2 +; SSE2-NEXT: cvtss2sd %xmm2, %xmm3 +; SSE2-NEXT: shll $16, %edx +; SSE2-NEXT: movd %edx, %xmm2 +; SSE2-NEXT: cvtss2sd %xmm2, %xmm2 +; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSE2-NEXT: shll $16, %ecx +; SSE2-NEXT: movd %ecx, %xmm3 +; SSE2-NEXT: cvtss2sd %xmm3, %xmm4 +; SSE2-NEXT: shll $16, %eax +; SSE2-NEXT: movd %eax, %xmm3 +; SSE2-NEXT: cvtss2sd %xmm3, %xmm3 +; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; SSE2-NEXT: retq +; +; F16-LABEL: pr64460_4: +; F16: # %bb.0: +; F16-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; F16-NEXT: vpslld $16, %ymm0, %ymm0 +; F16-NEXT: vcvtps2pd %ymm0, %zmm0 +; F16-NEXT: retq +; +; AVXNC-LABEL: pr64460_4: +; AVXNC: # %bb.0: +; AVXNC-NEXT: vpextrw $3, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; AVXNC-NEXT: vpextrw $2, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm2 +; AVXNC-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 +; AVXNC-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] +; AVXNC-NEXT: vpextrw $1, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm2 +; AVXNC-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 +; AVXNC-NEXT: vmovd %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm3 +; AVXNC-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 +; AVXNC-NEXT: vmovlhps {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVXNC-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 +; AVXNC-NEXT: vpextrw $7, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm1 +; AVXNC-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 +; AVXNC-NEXT: vpextrw $6, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm3 +; AVXNC-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 +; AVXNC-NEXT: vmovlhps {{.*#+}} xmm1 = xmm3[0],xmm1[0] +; AVXNC-NEXT: vpextrw $5, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm3 +; AVXNC-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 +; AVXNC-NEXT: vpextrw $4, %xmm0, %eax +; AVXNC-NEXT: shll $16, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 +; AVXNC-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; AVXNC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; AVXNC-NEXT: vmovaps %ymm2, %ymm0 +; AVXNC-NEXT: retq + %b = fpext <8 x bfloat> %a to <8 x double> + ret <8 x double> %b +} + +define <4 x bfloat> @fptrunc_v4f32(<4 x float> %a) nounwind { +; SSE2-LABEL: fptrunc_v4f32: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: subq $32, %rsp +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebx +; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd %xmm0, %r14d +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: pinsrw $0, %eax, %xmm0 +; SSE2-NEXT: pinsrw $0, %r14d, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: pinsrw $0, %ebp, %xmm0 +; SSE2-NEXT: pinsrw $0, %ebx, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: addq $32, %rsp +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r14 +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; F16-LABEL: fptrunc_v4f32: +; F16: # %bb.0: +; F16-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; F16-NEXT: vcvtneps2bf16 %ymm0, %xmm0 +; F16-NEXT: vzeroupper +; F16-NEXT: retq +; +; AVXNC-LABEL: fptrunc_v4f32: +; AVXNC: # %bb.0: +; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: retq + %b = fptrunc <4 x float> %a to <4 x bfloat> + ret <4 x bfloat> %b +} + +define <8 x bfloat> @fptrunc_v8f32(<8 x float> %a) nounwind { +; SSE2-LABEL: fptrunc_v8f32: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: subq $32, %rsp +; SSE2-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebx +; SSE2-NEXT: shll $16, %ebx +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebx, %r14d +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %ebx +; SSE2-NEXT: orl %ebp, %ebx +; SSE2-NEXT: shlq $32, %rbx +; SSE2-NEXT: orq %r14, %rbx +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebp, %r14d +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: orl %ebp, %eax +; SSE2-NEXT: shlq $32, %rax +; SSE2-NEXT: orq %r14, %rax +; SSE2-NEXT: movq %rax, %xmm1 +; SSE2-NEXT: movq %rbx, %xmm0 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: addq $32, %rsp +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r14 +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; F16-LABEL: fptrunc_v8f32: +; F16: # %bb.0: +; F16-NEXT: vcvtneps2bf16 %ymm0, %xmm0 +; F16-NEXT: vzeroupper +; F16-NEXT: retq +; +; AVXNC-LABEL: fptrunc_v8f32: +; AVXNC: # %bb.0: +; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: retq + %b = fptrunc <8 x float> %a to <8 x bfloat> + ret <8 x bfloat> %b +} + +define <16 x bfloat> @fptrunc_v16f32(<16 x float> %a) nounwind { +; SSE2-LABEL: fptrunc_v16f32: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %r15 +; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %r12 +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: subq $64, %rsp +; SSE2-NEXT: movaps %xmm3, (%rsp) # 16-byte Spill +; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm2, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebx +; SSE2-NEXT: shll $16, %ebx +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebx, %r14d +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %ebx +; SSE2-NEXT: orl %ebp, %ebx +; SSE2-NEXT: shlq $32, %rbx +; SSE2-NEXT: orq %r14, %rbx +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r15d +; SSE2-NEXT: orl %ebp, %r15d +; SSE2-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebp, %r14d +; SSE2-NEXT: shlq $32, %r14 +; SSE2-NEXT: orq %r15, %r14 +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r12d +; SSE2-NEXT: orl %ebp, %r12d +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r15d +; SSE2-NEXT: orl %ebp, %r15d +; SSE2-NEXT: shlq $32, %r15 +; SSE2-NEXT: orq %r12, %r15 +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r12d +; SSE2-NEXT: orl %ebp, %r12d +; SSE2-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncsfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: orl %ebp, %eax +; SSE2-NEXT: shlq $32, %rax +; SSE2-NEXT: orq %r12, %rax +; SSE2-NEXT: movq %rax, %xmm1 +; SSE2-NEXT: movq %r15, %xmm0 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: movq %r14, %xmm2 +; SSE2-NEXT: movq %rbx, %xmm1 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: addq $64, %rsp +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r12 +; SSE2-NEXT: popq %r14 +; SSE2-NEXT: popq %r15 +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; F16-LABEL: fptrunc_v16f32: +; F16: # %bb.0: +; F16-NEXT: vcvtneps2bf16 %zmm0, %ymm0 +; F16-NEXT: retq +; +; AVXNC-LABEL: fptrunc_v16f32: +; AVXNC: # %bb.0: +; AVXNC-NEXT: pushq %rbp +; AVXNC-NEXT: movq %rsp, %rbp +; AVXNC-NEXT: andq $-32, %rsp +; AVXNC-NEXT: subq $64, %rsp +; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm1, %xmm1 +; AVXNC-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) +; AVXNC-NEXT: {vex} vcvtneps2bf16 %ymm0, %xmm0 +; AVXNC-NEXT: vmovaps %xmm0, (%rsp) +; AVXNC-NEXT: vmovaps (%rsp), %ymm0 +; AVXNC-NEXT: movq %rbp, %rsp +; AVXNC-NEXT: popq %rbp +; AVXNC-NEXT: retq + %b = fptrunc <16 x float> %a to <16 x bfloat> + ret <16 x bfloat> %b +} + +define <8 x bfloat> @fptrunc_v8f64(<8 x double> %a) nounwind { +; SSE2-LABEL: fptrunc_v8f64: +; SSE2: # %bb.0: +; SSE2-NEXT: pushq %rbp +; SSE2-NEXT: pushq %r14 +; SSE2-NEXT: pushq %rbx +; SSE2-NEXT: subq $64, %rsp +; SSE2-NEXT: movaps %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebx +; SSE2-NEXT: shll $16, %ebx +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebx, %r14d +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %ebx +; SSE2-NEXT: orl %ebp, %ebx +; SSE2-NEXT: shlq $32, %rbx +; SSE2-NEXT: orq %r14, %rbx +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %r14d +; SSE2-NEXT: orl %ebp, %r14d +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %ebp +; SSE2-NEXT: shll $16, %ebp +; SSE2-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; SSE2-NEXT: callq __truncdfbf2@PLT +; SSE2-NEXT: movd %xmm0, %eax +; SSE2-NEXT: movzwl %ax, %eax +; SSE2-NEXT: orl %ebp, %eax +; SSE2-NEXT: shlq $32, %rax +; SSE2-NEXT: orq %r14, %rax +; SSE2-NEXT: movq %rax, %xmm1 +; SSE2-NEXT: movq %rbx, %xmm0 +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: addq $64, %rsp +; SSE2-NEXT: popq %rbx +; SSE2-NEXT: popq %r14 +; SSE2-NEXT: popq %rbp +; SSE2-NEXT: retq +; +; F16-LABEL: fptrunc_v8f64: +; F16: # %bb.0: +; F16-NEXT: pushq %rbp +; F16-NEXT: pushq %r15 +; F16-NEXT: pushq %r14 +; F16-NEXT: pushq %r13 +; F16-NEXT: pushq %r12 +; F16-NEXT: pushq %rbx +; F16-NEXT: subq $136, %rsp +; F16-NEXT: vmovupd %zmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 64-byte Spill +; F16-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; F16-NEXT: vzeroupper +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; F16-NEXT: vextractf128 $1, %ymm0, %xmm0 +; F16-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; F16-NEXT: vzeroupper +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; F16-NEXT: # xmm0 = mem[1,0] +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; F16-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; F16-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; F16-NEXT: vzeroupper +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; F16-NEXT: # xmm0 = mem[1,0] +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; F16-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; F16-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; F16-NEXT: vzeroupper +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; F16-NEXT: # xmm0 = mem[1,0] +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %ebp +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %r14d +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %r15d +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %r12d +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %r13d +; F16-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; F16-NEXT: # xmm0 = mem[0],zero,zero,zero +; F16-NEXT: vmovd %xmm0, %ebx +; F16-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %zmm0 # 64-byte Reload +; F16-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; F16-NEXT: vzeroupper +; F16-NEXT: callq __truncdfbf2@PLT +; F16-NEXT: vmovd %xmm0, %eax +; F16-NEXT: vmovd %eax, %xmm0 +; F16-NEXT: vpinsrw $1, %ebx, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0 +; F16-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; F16-NEXT: addq $136, %rsp +; F16-NEXT: popq %rbx +; F16-NEXT: popq %r12 +; F16-NEXT: popq %r13 +; F16-NEXT: popq %r14 +; F16-NEXT: popq %r15 +; F16-NEXT: popq %rbp +; F16-NEXT: retq +; +; AVXNC-LABEL: fptrunc_v8f64: +; AVXNC: # %bb.0: +; AVXNC-NEXT: pushq %rbp +; AVXNC-NEXT: pushq %r15 +; AVXNC-NEXT: pushq %r14 +; AVXNC-NEXT: pushq %r13 +; AVXNC-NEXT: pushq %r12 +; AVXNC-NEXT: pushq %rbx +; AVXNC-NEXT: subq $120, %rsp +; AVXNC-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVXNC-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; AVXNC-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[1,0] +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[1,0] +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVXNC-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[1,0] +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %ebp +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %r14d +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %r15d +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %r12d +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %r13d +; AVXNC-NEXT: vmovd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: # xmm0 = mem[0],zero,zero,zero +; AVXNC-NEXT: vmovd %xmm0, %ebx +; AVXNC-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload +; AVXNC-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVXNC-NEXT: vzeroupper +; AVXNC-NEXT: callq __truncdfbf2@PLT +; AVXNC-NEXT: vmovd %xmm0, %eax +; AVXNC-NEXT: vmovd %eax, %xmm0 +; AVXNC-NEXT: vpinsrw $1, %ebx, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $2, %r13d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $3, %r12d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $4, %r15d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $6, %ebp, %xmm0, %xmm0 +; AVXNC-NEXT: vpinsrw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 4-byte Folded Reload +; AVXNC-NEXT: addq $120, %rsp +; AVXNC-NEXT: popq %rbx +; AVXNC-NEXT: popq %r12 +; AVXNC-NEXT: popq %r13 +; AVXNC-NEXT: popq %r14 +; AVXNC-NEXT: popq %r15 +; AVXNC-NEXT: popq %rbp +; AVXNC-NEXT: retq + %b = fptrunc <8 x double> %a to <8 x bfloat> + ret <8 x bfloat> %b +} diff --git a/llvm/test/CodeGen/X86/machine-licm-vs-wineh.mir b/llvm/test/CodeGen/X86/machine-licm-vs-wineh.mir new file mode 100644 index 0000000000000..4bfd749fb7723 --- /dev/null +++ b/llvm/test/CodeGen/X86/machine-licm-vs-wineh.mir @@ -0,0 +1,141 @@ +# RUN: llc -o - %s -mtriple=x86_64-pc-windows-msvc -run-pass=machinelicm | FileCheck %s +# +# This test checks that MachineLICM doesn't hoist loads out of funclets. +# Manually modified from the IR of the following C++ function by running +# llc -stop-after=machine-cp. +# +# void may_throw(); +# void use(int); +# +# void test(int n, int arg) +# { +# for (int i = 0 ; i < n ; i++) +# try { +# may_throw(); +# } +# catch (...) { +# // Two uses to get 'arg' allocated to a register +# use(arg); +# use(arg); +# } +# } + +--- | + target triple = "x86_64-pc-windows-msvc" + + define void @test(i32 %n, i32 %arg) personality ptr @__CxxFrameHandler3 { + entry: + %cmp3 = icmp sgt i32 %n, 0 + br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + br label %for.body + + for.cond.cleanup: ; preds = %for.inc, %entry + ret void + + for.body: ; preds = %for.body.preheader, %for.inc + %lsr.iv = phi i32 [ %n, %for.body.preheader ], [ %lsr.iv.next, %for.inc ] + invoke void @may_throw() + to label %for.inc unwind label %catch.dispatch + + catch.dispatch: ; preds = %for.body + %0 = catchswitch within none [label %catch] unwind to caller + + catch: ; preds = %catch.dispatch + %1 = catchpad within %0 [ptr null, i32 64, ptr null] + call void @use(i32 %arg) [ "funclet"(token %1) ] + call void @use(i32 %arg) [ "funclet"(token %1) ] + catchret from %1 to label %for.inc + + for.inc: ; preds = %catch, %for.body + %lsr.iv.next = add i32 %lsr.iv, -1 + %exitcond.not = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + declare i32 @__CxxFrameHandler3(...) + + declare void @may_throw() + + declare void @use(i32) + +... +--- +name: test +alignment: 16 +tracksRegLiveness: true +hasEHCatchret: true +hasEHScopes: true +hasEHFunclets: true +debugInstrRef: true +tracksDebugUserValues: true +liveins: + - { reg: '$ecx' } + - { reg: '$edx' } +frameInfo: + maxAlignment: 8 + hasCalls: true + hasOpaqueSPAdjustment: true +stack: + - { id: 0, type: spill-slot, size: 4, alignment: 4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4 } +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1, %bb.2 + liveins: $ecx, $edx + + MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store (s32) into %stack.1) + TEST32rr renamable $ecx, renamable $ecx, implicit-def $eflags + JCC_1 %bb.2, 14, implicit killed $eflags + + bb.1: + liveins: $ecx + + JMP_1 %bb.3 + + bb.2.for.cond.cleanup: + RET 0 + + bb.3.for.body: + successors: %bb.5, %bb.4 + liveins: $ecx + + EH_LABEL + MOV32mr %stack.0, 1, $noreg, 0, $noreg, killed renamable $ecx :: (store (s32) into %stack.0) + ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CALL64pcrel32 @may_throw, csr_win64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + EH_LABEL + JMP_1 %bb.5 + + bb.4.catch (landing-pad, ehfunclet-entry): + successors: %bb.5 + + ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + renamable $esi = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load (s32) from %stack.1) + $ecx = COPY renamable $esi + CALL64pcrel32 @use, csr_win64, implicit $rsp, implicit $ssp, implicit $ecx, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + ADJCALLSTACKDOWN64 32, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + $ecx = COPY killed renamable $esi + CALL64pcrel32 @use, csr_win64, implicit $rsp, implicit $ssp, implicit $ecx, implicit-def $rsp, implicit-def $ssp + ADJCALLSTACKUP64 32, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp + CATCHRET %bb.5, %bb.0 + + bb.5.for.inc: + successors: %bb.2, %bb.3 + + renamable $ecx = MOV32rm %stack.0, 1, $noreg, 0, $noreg :: (load (s32) from %stack.0) + renamable $ecx = DEC32r killed renamable $ecx, implicit-def $eflags + JCC_1 %bb.2, 4, implicit killed $eflags + JMP_1 %bb.3 + +... +# +# CHECK: bb.4.catch +# CHECK: ADJCALLSTACKDOWN64 +# CHECK-NEXT: renamable [[REG:\$[a-z0-9]+]] = MOV32rm %stack.1 +# CHECK-NEXT: $ecx = COPY renamable [[REG]] +# CHECK-NEXT: CALL64pcrel32 @use diff --git a/llvm/test/CodeGen/X86/x86-prefer-no-gather-no-scatter.ll b/llvm/test/CodeGen/X86/x86-prefer-no-gather-no-scatter.ll index e3f3622f146d9..33250b3495a00 100644 --- a/llvm/test/CodeGen/X86/x86-prefer-no-gather-no-scatter.ll +++ b/llvm/test/CodeGen/X86/x86-prefer-no-gather-no-scatter.ll @@ -1,6 +1,6 @@ ; Check that if option prefer-no-gather/scatter can disable gather/scatter instructions. -; RUN: llc -mattr=+avx2,+fast-gather %s -o - | FileCheck %s --check-prefixes=GATHER -; RUN: llc -mattr=+avx2,+fast-gather,+prefer-no-gather %s -o - | FileCheck %s --check-prefixes=NO-GATHER +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+fast-gather %s -o - | FileCheck %s --check-prefixes=GATHER +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2,+fast-gather,+prefer-no-gather %s -o - | FileCheck %s --check-prefixes=NO-GATHER ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512dq < %s | FileCheck %s --check-prefix=SCATTER ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512dq,+prefer-no-gather < %s | FileCheck %s --check-prefix=SCATTER-NO-GATHER ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl,+avx512dq,+prefer-no-scatter < %s | FileCheck %s --check-prefix=GATHER-NO-SCATTER diff --git a/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll b/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll index fa8a4a60eac14..f08068420406d 100644 --- a/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll +++ b/llvm/test/Transforms/ConstraintElimination/large-constant-ints.ll @@ -282,6 +282,29 @@ else: ret i1 false } +define i1 @mul_nsw_decomp(i128 %x) { +; CHECK-LABEL: @mul_nsw_decomp( +; CHECK-NEXT: [[VAL:%.*]] = mul nsw i128 [[X:%.*]], 9223372036854775808 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i128 [[X]], [[VAL]] +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i128 [[X]], 0 +; CHECK-NEXT: ret i1 [[CMP2]] +; CHECK: else: +; CHECK-NEXT: ret i1 false +; + %val = mul nsw i128 %x, 9223372036854775808 + %cmp = icmp sgt i128 %x, %val + br i1 %cmp, label %then, label %else + +then: + %cmp2 = icmp sgt i128 %x, 0 + ret i1 %cmp2 + +else: + ret i1 false +} + define i1 @add_nuw_decomp_recursive() { ; CHECK-LABEL: @add_nuw_decomp_recursive( ; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 -9223372036854775808, 10 diff --git a/llvm/test/Transforms/JumpThreading/invalidate-lvi.ll b/llvm/test/Transforms/JumpThreading/invalidate-lvi.ll new file mode 100644 index 0000000000000..27191d6f54c2d --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/invalidate-lvi.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -S -passes=jump-threading < %s | FileCheck %s + +declare void @set_value(ptr) + +declare void @bar() + +define void @foo(i1 %0) { +; CHECK-LABEL: define void @foo( +; CHECK-SAME: i1 [[TMP0:%.*]]) { +; CHECK-NEXT: start: +; CHECK-NEXT: [[V:%.*]] = alloca i64, align 8 +; CHECK-NEXT: call void @set_value(ptr [[V]]) +; CHECK-NEXT: [[L1:%.*]] = load i64, ptr [[V]], align 8 +; CHECK-NEXT: br i1 [[TMP0]], label [[BB0:%.*]], label [[BB2:%.*]] +; CHECK: bb0: +; CHECK-NEXT: [[C1:%.*]] = icmp eq i64 [[L1]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[BB2_THREAD:%.*]], label [[BB2]] +; CHECK: bb2.thread: +; CHECK-NEXT: store i64 0, ptr [[V]], align 8 +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb2: +; CHECK-NEXT: [[L2:%.*]] = phi i64 [ [[L1]], [[BB0]] ], [ [[L1]], [[START:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[L2]], 2 +; CHECK-NEXT: br i1 [[TMP1]], label [[BB3:%.*]], label [[BB4]] +; CHECK: bb3: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: ret void +; CHECK: bb4: +; CHECK-NEXT: ret void +; +start: + %v = alloca i64, align 8 + call void @set_value(ptr %v) + %l1 = load i64, ptr %v, align 8, !range !0 + br i1 %0, label %bb0, label %bb2 + +bb0: ; preds = %start + %c1 = icmp eq i64 %l1, 0 + br i1 %c1, label %bb1, label %bb2 + +bb1: ; preds = %bb0 + store i64 0, ptr %v, align 8 + br label %bb2 + +bb2: ; preds = %bb1, %bb0, %start + %l2 = load i64, ptr %v, align 8 + %1 = icmp eq i64 %l2, 2 + br i1 %1, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + call void @bar() + ret void + +bb4: ; preds = %bb2 + ret void +} + +!0 = !{i64 0, i64 2} diff --git a/llvm/test/Transforms/SROA/scalable-vector-struct.ll b/llvm/test/Transforms/SROA/scalable-vector-struct.ll index 92cd44d2b5ac3..1af4fbbd9254b 100644 --- a/llvm/test/Transforms/SROA/scalable-vector-struct.ll +++ b/llvm/test/Transforms/SROA/scalable-vector-struct.ll @@ -20,3 +20,34 @@ define %struct.test @alloca( %x, %y) { %val = load %struct.test, %struct.test* %addr, align 4 ret %struct.test %val } + + +define { , } @return_tuple( %v_tuple.coerce0, %v_tuple.coerce1) { +; CHECK-LABEL: @return_tuple( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = insertvalue { , } poison, [[V_TUPLE_COERCE0:%.*]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { , } [[TMP0]], [[V_TUPLE_COERCE1:%.*]], 1 +; CHECK-NEXT: [[COERCE_EXTRACT0:%.*]] = extractvalue { , } [[TMP1]], 0 +; CHECK-NEXT: [[COERCE_EXTRACT1:%.*]] = extractvalue { , } [[TMP1]], 1 +; CHECK-NEXT: [[CALL:%.*]] = call { , } @foo( [[COERCE_EXTRACT0]], [[COERCE_EXTRACT1]]) +; CHECK-NEXT: ret { , } [[CALL]] +; +entry: + %v_tuple = alloca { , }, align 4 + %v_tuple.addr = alloca { , }, align 4 + %coerce = alloca { , }, align 4 + %0 = insertvalue { , } poison, %v_tuple.coerce0, 0 + %1 = insertvalue { , } %0, %v_tuple.coerce1, 1 + store { , } %1, ptr %v_tuple, align 4 + %v_tuple1 = load { , }, ptr %v_tuple, align 4 + store { , } %v_tuple1, ptr %v_tuple.addr, align 4 + %2 = load { , }, ptr %v_tuple.addr, align 4 + store { , } %2, ptr %coerce, align 4 + %coerce.tuple = load { , }, ptr %coerce, align 4 + %coerce.extract0 = extractvalue { , } %coerce.tuple, 0 + %coerce.extract1 = extractvalue { , } %coerce.tuple, 1 + %call = call { , } @foo( %coerce.extract0, %coerce.extract1) + ret { , } %call +} + +declare { , } @foo(, ) diff --git a/llvm/test/tools/llvm-rc/windres-preproc.test b/llvm/test/tools/llvm-rc/windres-preproc.test index 888be03f7d9e4..e55195b3a4d28 100644 --- a/llvm/test/tools/llvm-rc/windres-preproc.test +++ b/llvm/test/tools/llvm-rc/windres-preproc.test @@ -4,6 +4,7 @@ ; REQUIRES: shell ; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\\\"foo bar\\\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\\\"baz baz\\\"" -DFOO5=\"bar\" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK1 +; RUN: llvm-windres -### --include-dir %p/incdir1 --include %p/incdir2 "-DFOO1=\"foo bar\"" -UFOO2 -D FOO3 --preprocessor-arg "-DFOO4=\"baz baz\"" "-DFOO5=bar" %p/Inputs/empty.rc %t.res --use-temp-file | FileCheck %s --check-prefix=CHECK1 ; CHECK1: {{^}} "clang" "--driver-mode=gcc" "-target" "{{.*}}-{{.*}}{{mingw32|windows-gnu}}" "-E" "-xc" "-DRC_INVOKED" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc" "-I" "{{.*}}incdir1" "-I" "{{.*}}incdir2" "-D" "FOO1=\"foo bar\"" "-U" "FOO2" "-D" "FOO3" "-DFOO4=\"baz baz\"" "-D" "FOO5=bar"{{$}} ; RUN: llvm-windres -### --preprocessor "i686-w64-mingw32-gcc -E -DFOO=\\\"foo\\ bar\\\"" %p/Inputs/empty.rc %t.res | FileCheck %s --check-prefix=CHECK2 ; CHECK2: {{^}} "i686-w64-mingw32-gcc" "-E" "-DFOO=\"foo bar\"" "{{.*}}empty.rc" "-o" "{{.*}}preproc-{{.*}}.rc"{{$}} diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp index e690089fec565..b3d40800aef63 100644 --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -1083,11 +1083,11 @@ ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { #define MAP_FIXED_NOREPLACE MAP_FIXED #endif -// 32 bit ARM doesn't have mmap and uses mmap2 instead. The only difference -// between the two syscalls is that mmap2's offset parameter is in terms 4096 -// byte offsets rather than individual bytes, so for our purposes they are -// effectively the same as all ofsets here are set to 0. -#ifdef __arm__ +// Some 32-bit architectures don't have mmap and define mmap2 instead. The only +// difference between the two syscalls is that mmap2's offset parameter is in +// terms 4096 byte offsets rather than individual bytes, so for our purposes +// they are effectively the same as all ofsets here are set to 0. +#if defined(SYS_mmap2) && !defined(SYS_mmap) #define SYS_mmap SYS_mmap2 #endif diff --git a/llvm/tools/llvm-rc/WindresOpts.td b/llvm/tools/llvm-rc/WindresOpts.td index 3c75c85ece0f6..42a56dbfda4cd 100644 --- a/llvm/tools/llvm-rc/WindresOpts.td +++ b/llvm/tools/llvm-rc/WindresOpts.td @@ -48,6 +48,10 @@ defm codepage : LongShort<"c", "codepage", "Default codepage to use">; defm language : LongShort<"l", "language", "Default language to use (0x0-0xffff)">; +def use_temp_file: Flag<["--"], "use-temp-file">, + HelpText<"Mimic GNU windres preprocessor option handling " + "(don't unescape preprocessor options)">; + defm verbose : F<"v", "verbose", "Enable verbose output">; defm version : F<"V", "version", "Display version">; @@ -57,6 +61,3 @@ defm help : F<"h", "help", "Display this message and exit">; def _HASH_HASH_HASH : Flag<["-"], "###">; def no_preprocess : Flag<["--"], "no-preprocess">; - -// Unimplemented options for compatibility -def use_temp_file: Flag<["--"], "use-temp-file">; diff --git a/llvm/tools/llvm-rc/llvm-rc.cpp b/llvm/tools/llvm-rc/llvm-rc.cpp index 4a77f4bd88cce..0caa8117cb70b 100644 --- a/llvm/tools/llvm-rc/llvm-rc.cpp +++ b/llvm/tools/llvm-rc/llvm-rc.cpp @@ -142,20 +142,24 @@ ErrorOr findClang(const char *Argv0, StringRef Triple) { if (MainExecPath.empty()) MainExecPath = Argv0; - StringRef Parent = llvm::sys::path::parent_path(MainExecPath); ErrorOr Path = std::error_code(); std::string TargetClang = (Triple + "-clang").str(); std::string VersionedClang = ("clang-" + Twine(LLVM_VERSION_MAJOR)).str(); - if (!Parent.empty()) { - // First look for the tool with all potential names in the specific - // directory of Argv0, if known - for (const auto *Name : - {TargetClang.c_str(), VersionedClang.c_str(), "clang", "clang-cl"}) { + for (const auto *Name : + {TargetClang.c_str(), VersionedClang.c_str(), "clang", "clang-cl"}) { + for (const StringRef Parent : + {llvm::sys::path::parent_path(MainExecPath), + llvm::sys::path::parent_path(Argv0)}) { + // Look for various versions of "clang" first in the MainExecPath parent + // directory and then in the argv[0] parent directory. + // On Windows (but not Unix) argv[0] is overwritten with the eqiuvalent + // of MainExecPath by InitLLVM. Path = sys::findProgramByName(Name, Parent); if (Path) return Path; } } + // If no parent directory known, or not found there, look everywhere in PATH for (const auto *Name : {"clang", "clang-cl"}) { Path = sys::findProgramByName(Name); @@ -469,7 +473,14 @@ RcOptions parseWindresOptions(ArrayRef ArgsArr, // done this double escaping) probably is confined to cases like these // quoted string defines, and those happen to work the same across unix // and windows. - std::string Unescaped = unescape(Arg->getValue()); + // + // If GNU windres is executed with --use-temp-file, it doesn't use + // popen() to invoke the preprocessor, but uses another function which + // actually preserves tricky characters better. To mimic this behaviour, + // don't unescape arguments here. + std::string Value = Arg->getValue(); + if (!InputArgs.hasArg(WINDRES_use_temp_file)) + Value = unescape(Value); switch (Arg->getOption().getID()) { case WINDRES_include_dir: // Technically, these are handled the same way as e.g. defines, but @@ -483,17 +494,19 @@ RcOptions parseWindresOptions(ArrayRef ArgsArr, break; case WINDRES_define: Opts.PreprocessArgs.push_back("-D"); - Opts.PreprocessArgs.push_back(Unescaped); + Opts.PreprocessArgs.push_back(Value); break; case WINDRES_undef: Opts.PreprocessArgs.push_back("-U"); - Opts.PreprocessArgs.push_back(Unescaped); + Opts.PreprocessArgs.push_back(Value); break; case WINDRES_preprocessor_arg: - Opts.PreprocessArgs.push_back(Unescaped); + Opts.PreprocessArgs.push_back(Value); break; } } + // TODO: If --use-temp-file is set, we shouldn't be unescaping + // the --preprocessor argument either, only splitting it. if (InputArgs.hasArg(WINDRES_preprocessor)) Opts.PreprocessCmd = unescapeSplit(InputArgs.getLastArgValue(WINDRES_preprocessor)); diff --git a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp index aa5d525f24eb7..c001c693cc146 100644 --- a/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp +++ b/llvm/unittests/tools/llvm-exegesis/X86/TargetTest.cpp @@ -635,11 +635,11 @@ TEST_F(X86Core2TargetTest, GenerateExitSyscallTest) { #define MAP_FIXED_NOREPLACE MAP_FIXED #endif -// 32 bit ARM doesn't have mmap and uses mmap2 instead. The only difference -// between the two syscalls is that mmap2's offset parameter is in terms 4096 -// byte offsets rather than individual bytes, so for our purposes they are -// effectively the same as all ofsets here are set to 0. -#ifdef __arm__ +// Some 32-bit architectures don't have mmap and define mmap2 instead. The only +// difference between the two syscalls is that mmap2's offset parameter is in +// terms 4096 byte offsets rather than individual bytes, so for our purposes +// they are effectively the same as all ofsets here are set to 0. +#if defined(SYS_mmap2) && !defined(SYS_mmap) #define SYS_mmap SYS_mmap2 #endif diff --git a/mlir/docs/PatternRewriter.md b/mlir/docs/PatternRewriter.md index 8428d4ba991ef..8fe5ef35a7603 100644 --- a/mlir/docs/PatternRewriter.md +++ b/mlir/docs/PatternRewriter.md @@ -383,7 +383,7 @@ Example output is shown below: ``` //===-------------------------------------------===// Processing operation : 'cf.cond_br'(0x60f000001120) { - "cf.cond_br"(%arg0)[^bb2, ^bb2] {operand_segment_sizes = array} : (i1) -> () + "cf.cond_br"(%arg0)[^bb2, ^bb2] {operandSegmentSizes = array} : (i1) -> () * Pattern SimplifyConstCondBranchPred : 'cf.cond_br -> ()' { } -> failure : pattern failed to match diff --git a/mlir/include/mlir/Dialect/IRDL/IR/IRDLOps.td b/mlir/include/mlir/Dialect/IRDL/IR/IRDLOps.td index dfa97c865118f..9f15ca767abf9 100644 --- a/mlir/include/mlir/Dialect/IRDL/IR/IRDLOps.td +++ b/mlir/include/mlir/Dialect/IRDL/IR/IRDLOps.td @@ -223,6 +223,18 @@ def IRDL_OperandsOp : IRDL_Op<"operands", [HasParent<"OperationOp">]> { The `mul` operation will expect two operands of type `cmath.complex`, that have the same type, and return a result of the same type. + + The operands can also be marked as variadic or optional: + ```mlir + irdl.operands(%0, single %1, optional %2, variadic %3) + ``` + + Here, %0 and %1 are required single operands, %2 is an optional operand, + and %3 is a variadic operand. + + When more than one operand is marked as optional or variadic, the operation + will expect a 'operandSegmentSizes' attribute that defines the number of + operands in each segment. }]; let arguments = (ins Variadic:$args); @@ -254,6 +266,18 @@ def IRDL_ResultsOp : IRDL_Op<"results", [HasParent<"OperationOp">]> { The operation will expect one operand of the `cmath.complex` type, and two results that have the underlying type of the `cmath.complex`. + + The results can also be marked as variadic or optional: + ```mlir + irdl.results(%0, single %1, optional %2, variadic %3) + ``` + + Here, %0 and %1 are required single results, %2 is an optional result, + and %3 is a variadic result. + + When more than one result is marked as optional or variadic, the operation + will expect a 'resultSegmentSizes' attribute that defines the number of + results in each segment. }]; let arguments = (ins Variadic:$args); diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td index 1efd2b6b63dd9..4567b3f1902d7 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td @@ -874,23 +874,6 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> { return cast(*this->getOperation()) .hasTensorSemantics(); } - - //========================================================================// - // Helper functions to mutate the `operand_segment_sizes` attribute. - // These are useful when cloning and changing operand types. - //========================================================================// - void setNumInputs(unsigned num) { setOperandSegmentAt(0, num); } - void setNumOutputBuffers(unsigned num) { setOperandSegmentAt(1, num); } - - private: - void setOperandSegmentAt(unsigned idx, unsigned val) { - auto attr = ::llvm::cast( - (*this)->getAttr("operand_segment_sizes")); - unsigned i = 0; - auto newAttr = attr.mapValues(IntegerType::get(getContext(), 32), - [&](const APInt &v) { return (i++ == idx) ? APInt(32, val) : v; }); - getOperation()->setAttr("operand_segment_sizes", newAttr); - } }]; let verify = [{ return detail::verifyStructuredOpInterface($_op); }]; diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td index 274a531f4061e..f25106b1593a3 100644 --- a/mlir/include/mlir/IR/OpBase.td +++ b/mlir/include/mlir/IR/OpBase.td @@ -2178,7 +2178,7 @@ def SameVariadicOperandSize : GenInternalOpTrait<"SameVariadicOperandSize">; // to have the same array size. def SameVariadicResultSize : GenInternalOpTrait<"SameVariadicResultSize">; -// Uses an attribute named `operand_segment_sizes` to specify how many actual +// Uses an attribute named `operandSegmentSizes` to specify how many actual // operand each ODS-declared operand (variadic or not) corresponds to. // This trait is used for ops that have multiple variadic operands but do // not know statically their size relationship. The attribute must be a 1D @@ -2188,7 +2188,7 @@ def SameVariadicResultSize : GenInternalOpTrait<"SameVariadicResultSize">; def AttrSizedOperandSegments : NativeOpTrait<"AttrSizedOperandSegments">, StructuralOpTrait; // Similar to AttrSizedOperandSegments, but used for results. The attribute -// should be named as `result_segment_sizes`. +// should be named as `resultSegmentSizes`. def AttrSizedResultSegments : NativeOpTrait<"AttrSizedResultSegments">, StructuralOpTrait; diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h index d42bffaf32b03..afbd0395b466a 100644 --- a/mlir/include/mlir/IR/OpDefinition.h +++ b/mlir/include/mlir/IR/OpDefinition.h @@ -1331,7 +1331,7 @@ struct HasParent { /// relationship is not always known statically. For such cases, we need /// a per-op-instance specification to divide the operands into logical groups /// or segments. This can be modeled by attributes. The attribute will be named -/// as `operand_segment_sizes`. +/// as `operandSegmentSizes`. /// /// This trait verifies the attribute for specifying operand segments has /// the correct type (1D vector) and values (non-negative), etc. @@ -1339,9 +1339,7 @@ template class AttrSizedOperandSegments : public TraitBase { public: - static StringRef getOperandSegmentSizeAttr() { - return "operand_segment_sizes"; - } + static StringRef getOperandSegmentSizeAttr() { return "operandSegmentSizes"; } static LogicalResult verifyTrait(Operation *op) { return ::mlir::OpTrait::impl::verifyOperandSizeAttr( @@ -1354,7 +1352,7 @@ template class AttrSizedResultSegments : public TraitBase { public: - static StringRef getResultSegmentSizeAttr() { return "result_segment_sizes"; } + static StringRef getResultSegmentSizeAttr() { return "resultSegmentSizes"; } static LogicalResult verifyTrait(Operation *op) { return ::mlir::OpTrait::impl::verifyResultSizeAttr( diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h index 0eeb8bb1ec8da..2131fe313f8c5 100644 --- a/mlir/include/mlir/IR/OpImplementation.h +++ b/mlir/include/mlir/IR/OpImplementation.h @@ -715,18 +715,20 @@ class AsmParser { //===--------------------------------------------------------------------===// /// This class represents a StringSwitch like class that is useful for parsing - /// expected keywords. On construction, it invokes `parseKeyword` and - /// processes each of the provided cases statements until a match is hit. The - /// provided `ResultT` must be assignable from `failure()`. + /// expected keywords. On construction, unless a non-empty keyword is + /// provided, it invokes `parseKeyword` and processes each of the provided + /// cases statements until a match is hit. The provided `ResultT` must be + /// assignable from `failure()`. template class KeywordSwitch { public: - KeywordSwitch(AsmParser &parser) + KeywordSwitch(AsmParser &parser, StringRef *keyword = nullptr) : parser(parser), loc(parser.getCurrentLocation()) { - if (failed(parser.parseKeywordOrCompletion(&keyword))) + if (keyword && !keyword->empty()) + this->keyword = *keyword; + else if (failed(parser.parseKeywordOrCompletion(&this->keyword))) result = failure(); } - /// Case that uses the provided value when true. KeywordSwitch &Case(StringLiteral str, ResultT value) { return Case(str, [&](StringRef, SMLoc) { return std::move(value); }); diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index f3a79eb52f8ec..adae3560570dd 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -555,7 +555,7 @@ class RegisteredOperationName : public OperationName { StringRef name) final { if constexpr (hasProperties) { auto concreteOp = cast(op); - return ConcreteOp::getInherentAttr(concreteOp.getContext(), + return ConcreteOp::getInherentAttr(concreteOp->getContext(), concreteOp.getProperties(), name); } // If the op does not have support for properties, we dispatch back to the @@ -576,7 +576,7 @@ class RegisteredOperationName : public OperationName { void populateInherentAttrs(Operation *op, NamedAttrList &attrs) final { if constexpr (hasProperties) { auto concreteOp = cast(op); - ConcreteOp::populateInherentAttrs(concreteOp.getContext(), + ConcreteOp::populateInherentAttrs(concreteOp->getContext(), concreteOp.getProperties(), attrs); } } diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h index 75fe1524221c1..d5f1ea0fe0350 100644 --- a/mlir/include/mlir/Pass/PassManager.h +++ b/mlir/include/mlir/Pass/PassManager.h @@ -172,6 +172,10 @@ class OpPassManager { /// if a pass manager has already been initialized. LogicalResult initialize(MLIRContext *context, unsigned newInitGeneration); + /// Compute a hash of the pipeline, so that we can detect changes (a pass is + /// added...). + llvm::hash_code hash(); + /// A pointer to an internal implementation instance. std::unique_ptr impl; @@ -439,9 +443,11 @@ class PassManager : public OpPassManager { /// generate reproducers. std::unique_ptr crashReproGenerator; - /// A hash key used to detect when reinitialization is necessary. + /// Hash keys used to detect when reinitialization is necessary. llvm::hash_code initializationKey = DenseMapInfo::getTombstoneKey(); + llvm::hash_code pipelineInitializationKey = + DenseMapInfo::getTombstoneKey(); /// Flag that specifies if pass timing is enabled. bool passTiming : 1; diff --git a/mlir/lib/Bindings/Python/IRCore.cpp b/mlir/lib/Bindings/Python/IRCore.cpp index 971d2819ade44..c755dc12a311b 100644 --- a/mlir/lib/Bindings/Python/IRCore.cpp +++ b/mlir/lib/Bindings/Python/IRCore.cpp @@ -1675,28 +1675,28 @@ py::object PyOpView::buildGeneric( } else { attributes = py::dict(); } - if (attributes->contains("result_segment_sizes") || - attributes->contains("operand_segment_sizes")) { - throw py::value_error("Manually setting a 'result_segment_sizes' or " - "'operand_segment_sizes' attribute is unsupported. " + if (attributes->contains("resultSegmentSizes") || + attributes->contains("operandSegmentSizes")) { + throw py::value_error("Manually setting a 'resultSegmentSizes' or " + "'operandSegmentSizes' attribute is unsupported. " "Use Operation.create for such low-level access."); } - // Add result_segment_sizes attribute. + // Add resultSegmentSizes attribute. if (!resultSegmentLengths.empty()) { MlirAttribute segmentLengthAttr = mlirDenseI32ArrayGet(context->get(), resultSegmentLengths.size(), resultSegmentLengths.data()); - (*attributes)["result_segment_sizes"] = + (*attributes)["resultSegmentSizes"] = PyAttribute(context, segmentLengthAttr); } - // Add operand_segment_sizes attribute. + // Add operandSegmentSizes attribute. if (!operandSegmentLengths.empty()) { MlirAttribute segmentLengthAttr = mlirDenseI32ArrayGet(context->get(), operandSegmentLengths.size(), operandSegmentLengths.data()); - (*attributes)["operand_segment_sizes"] = + (*attributes)["operandSegmentSizes"] = PyAttribute(context, segmentLengthAttr); } } diff --git a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp index 9dfe07797ff4b..e6154a329aacc 100644 --- a/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp +++ b/mlir/lib/Dialect/AMDGPU/Transforms/EmulateAtomics.cpp @@ -53,7 +53,7 @@ enum class DataArgAction : unsigned char { // Fix up the fact that, when we're migrating from a general bugffer atomic // to a load or to a CAS, the number of openrands, and thus the number of -// entries needed in operand_segment_sizes, needs to change. We use this method +// entries needed in operandSegmentSizes, needs to change. We use this method // because we'd like to preserve unknown attributes on the atomic instead of // discarding them. static void patchOperandSegmentSizes(ArrayRef attrs, @@ -61,7 +61,7 @@ static void patchOperandSegmentSizes(ArrayRef attrs, DataArgAction action) { newAttrs.reserve(attrs.size()); for (NamedAttribute attr : attrs) { - if (attr.getName().getValue() != "operand_segment_sizes") { + if (attr.getName().getValue() != "operandSegmentSizes") { newAttrs.push_back(attr); continue; } diff --git a/mlir/lib/Dialect/Async/IR/Async.cpp b/mlir/lib/Dialect/Async/IR/Async.cpp index 7d018bf8f3a3d..abe6670c7f855 100644 --- a/mlir/lib/Dialect/Async/IR/Async.cpp +++ b/mlir/lib/Dialect/Async/IR/Async.cpp @@ -61,7 +61,7 @@ YieldOp::getMutableSuccessorOperands(std::optional index) { /// ExecuteOp //===----------------------------------------------------------------------===// -constexpr char kOperandSegmentSizesAttr[] = "operand_segment_sizes"; +constexpr char kOperandSegmentSizesAttr[] = "operandSegmentSizes"; OperandRange ExecuteOp::getSuccessorEntryOperands(std::optional index) { @@ -100,7 +100,7 @@ void ExecuteOp::build(OpBuilder &builder, OperationState &result, result.addOperands(dependencies); result.addOperands(operands); - // Add derived `operand_segment_sizes` attribute based on parsed operands. + // Add derived `operandSegmentSizes` attribute based on parsed operands. int32_t numDependencies = dependencies.size(); int32_t numOperands = operands.size(); auto operandSegmentSizes = @@ -208,7 +208,7 @@ ParseResult ExecuteOp::parse(OpAsmParser &parser, OperationState &result) { int32_t numOperands = valueArgs.size(); - // Add derived `operand_segment_sizes` attribute based on parsed operands. + // Add derived `operandSegmentSizes` attribute based on parsed operands. auto operandSegmentSizes = parser.getBuilder().getDenseI32ArrayAttr({numDependencies, numOperands}); result.addAttribute(kOperandSegmentSizesAttr, operandSegmentSizes); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index d6778ed72c7d0..4f5452b27e3e0 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -170,7 +170,7 @@ static void buildStructuredOp(OpBuilder &b, OperationState &state, state.addTypes(derivedResultTypes); state.addAttributes(attributes); state.addAttribute( - "operand_segment_sizes", + "operandSegmentSizes", b.getDenseI32ArrayAttr({static_cast(inputs.size()), static_cast(outputs.size())})); @@ -226,18 +226,18 @@ parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result, // This is a bit complex because we're trying to be backward compatible with // operation syntax that mix the inherent attributes and the discardable // ones in the same dictionary. If the properties are used, we append the - // operand_segment_sizes there directly. Otherwise we append it to the + // operandSegmentSizes there directly. Otherwise we append it to the // discardable attributes dictionary where it is handled by the generic // Operation::create(...) method. if (result.propertiesAttr) { NamedAttrList attrs = llvm::cast(result.propertiesAttr); - attrs.append("operand_segment_sizes", + attrs.append("operandSegmentSizes", parser.getBuilder().getDenseI32ArrayAttr( {static_cast(inputsOperands.size()), static_cast(outputsOperands.size())})); result.propertiesAttr = attrs.getDictionary(parser.getContext()); } else { - result.addAttribute("operand_segment_sizes", + result.addAttribute("operandSegmentSizes", parser.getBuilder().getDenseI32ArrayAttr( {static_cast(inputsOperands.size()), static_cast(outputsOperands.size())})); @@ -332,7 +332,7 @@ static void printNamedStructuredOp(OpAsmPrinter &p, Operation *op, ValueRange inputs, ValueRange outputs) { p.printOptionalAttrDict( op->getAttrs(), - /*elidedAttrs=*/{"operand_segment_sizes", + /*elidedAttrs=*/{"operandSegmentSizes", // See generated code in // LinalgNamedStructuredOps.yamlgen.cpp.inc "linalg.memoized_indexing_maps"}); @@ -878,7 +878,7 @@ void GenericOp::print(OpAsmPrinter &p) { printCommonStructuredOpParts(p, SmallVector(getDpsInputOperands()), SmallVector(getDpsInitOperands())); - genericAttrNames.push_back("operand_segment_sizes"); + genericAttrNames.push_back("operandSegmentSizes"); genericAttrNamesSet.insert(genericAttrNames.back()); bool hasExtraAttrs = false; diff --git a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp index ee8f23cf362b6..98c97fdc2c090 100644 --- a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp +++ b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp @@ -168,11 +168,26 @@ static LogicalResult convertPowfOp(math::PowFOp op, PatternRewriter &rewriter) { Value operandA = op.getOperand(0); Value operandB = op.getOperand(1); Type opType = operandA.getType(); + Value zero = createFloatConst(op->getLoc(), opType, 0.00, rewriter); + Value two = createFloatConst(op->getLoc(), opType, 2.00, rewriter); + Value negOne = createFloatConst(op->getLoc(), opType, -1.00, rewriter); + Value opASquared = b.create(opType, operandA, operandA); + Value opBHalf = b.create(opType, operandB, two); - Value logA = b.create(opType, operandA); - Value mult = b.create(opType, logA, operandB); + Value logA = b.create(opType, opASquared); + Value mult = b.create(opType, opBHalf, logA); Value expResult = b.create(opType, mult); - rewriter.replaceOp(op, expResult); + Value negExpResult = b.create(opType, expResult, negOne); + Value remainder = b.create(opType, operandB, two); + Value negCheck = + b.create(arith::CmpFPredicate::OLT, operandA, zero); + Value oddPower = + b.create(arith::CmpFPredicate::ONE, remainder, zero); + Value oddAndNeg = b.create(op->getLoc(), oddPower, negCheck); + + Value res = b.create(op->getLoc(), oddAndNeg, negExpResult, + expResult); + rewriter.replaceOp(op, res); return success(); } diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 5f35adf0ddaab..658756c6a6e61 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -31,23 +31,17 @@ namespace { namespace saturated_arith { struct Wrapper { static Wrapper stride(int64_t v) { - return (ShapedType::isDynamic(v)) ? Wrapper{true, 0} - : Wrapper{false, v}; + return (ShapedType::isDynamic(v)) ? Wrapper{true, 0} : Wrapper{false, v}; } static Wrapper offset(int64_t v) { - return (ShapedType::isDynamic(v)) ? Wrapper{true, 0} - : Wrapper{false, v}; + return (ShapedType::isDynamic(v)) ? Wrapper{true, 0} : Wrapper{false, v}; } static Wrapper size(int64_t v) { return (ShapedType::isDynamic(v)) ? Wrapper{true, 0} : Wrapper{false, v}; } - int64_t asOffset() { - return saturated ? ShapedType::kDynamic : v; - } + int64_t asOffset() { return saturated ? ShapedType::kDynamic : v; } int64_t asSize() { return saturated ? ShapedType::kDynamic : v; } - int64_t asStride() { - return saturated ? ShapedType::kDynamic : v; - } + int64_t asStride() { return saturated ? ShapedType::kDynamic : v; } bool operator==(Wrapper other) { return (saturated && other.saturated) || (!saturated && !other.saturated && v == other.v); @@ -732,8 +726,7 @@ bool CastOp::canFoldIntoConsumerOp(CastOp castOp) { for (auto it : llvm::zip(sourceStrides, resultStrides)) { auto ss = std::get<0>(it), st = std::get<1>(it); if (ss != st) - if (ShapedType::isDynamic(ss) && - !ShapedType::isDynamic(st)) + if (ShapedType::isDynamic(ss) && !ShapedType::isDynamic(st)) return false; } @@ -766,8 +759,7 @@ bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { // same. They are also compatible if either one is dynamic (see // description of MemRefCastOp for details). auto checkCompatible = [](int64_t a, int64_t b) { - return (ShapedType::isDynamic(a) || - ShapedType::isDynamic(b) || a == b); + return (ShapedType::isDynamic(a) || ShapedType::isDynamic(b) || a == b); }; if (!checkCompatible(aOffset, bOffset)) return false; @@ -1890,8 +1882,7 @@ LogicalResult ReinterpretCastOp::verify() { // Match offset in result memref type and in static_offsets attribute. int64_t expectedOffset = getStaticOffsets().front(); if (!ShapedType::isDynamic(resultOffset) && - !ShapedType::isDynamic(expectedOffset) && - resultOffset != expectedOffset) + !ShapedType::isDynamic(expectedOffset) && resultOffset != expectedOffset) return emitError("expected result type with offset = ") << expectedOffset << " instead of " << resultOffset; @@ -2945,18 +2936,6 @@ static MemRefType getCanonicalSubViewResultType( nonRankReducedType.getMemorySpace()); } -/// Compute the canonical result type of a SubViewOp. Call `inferResultType` -/// to deduce the result type. Additionally, reduce the rank of the inferred -/// result type if `currentResultType` is lower rank than `sourceType`. -static MemRefType getCanonicalSubViewResultType( - MemRefType currentResultType, MemRefType sourceType, - ArrayRef mixedOffsets, ArrayRef mixedSizes, - ArrayRef mixedStrides) { - return getCanonicalSubViewResultType(currentResultType, sourceType, - sourceType, mixedOffsets, mixedSizes, - mixedStrides); -} - Value mlir::memref::createCanonicalRankReducingSubViewOp( OpBuilder &b, Location loc, Value memref, ArrayRef targetShape) { auto memrefType = llvm::cast(memref.getType()); @@ -3109,9 +3088,32 @@ struct SubViewReturnTypeCanonicalizer { MemRefType operator()(SubViewOp op, ArrayRef mixedOffsets, ArrayRef mixedSizes, ArrayRef mixedStrides) { - return getCanonicalSubViewResultType(op.getType(), op.getSourceType(), - mixedOffsets, mixedSizes, - mixedStrides); + // Infer a memref type without taking into account any rank reductions. + MemRefType nonReducedType = cast(SubViewOp::inferResultType( + op.getSourceType(), mixedOffsets, mixedSizes, mixedStrides)); + + // Directly return the non-rank reduced type if there are no dropped dims. + llvm::SmallBitVector droppedDims = op.getDroppedDims(); + if (droppedDims.empty()) + return nonReducedType; + + // Take the strides and offset from the non-rank reduced type. + auto [nonReducedStrides, offset] = getStridesAndOffset(nonReducedType); + + // Drop dims from shape and strides. + SmallVector targetShape; + SmallVector targetStrides; + for (int64_t i = 0; i < static_cast(mixedSizes.size()); ++i) { + if (droppedDims.test(i)) + continue; + targetStrides.push_back(nonReducedStrides[i]); + targetShape.push_back(nonReducedType.getDimSize(i)); + } + + return MemRefType::get(targetShape, nonReducedType.getElementType(), + StridedLayoutAttr::get(nonReducedType.getContext(), + offset, targetStrides), + nonReducedType.getMemorySpace()); } }; diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index aaa5e39cd2f3d..a7b516e1e8640 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -1365,7 +1365,7 @@ ParseResult ForallOp::parse(OpAsmParser &parser, OperationState &result) { result.addAttribute("staticLowerBound", staticLbs); result.addAttribute("staticUpperBound", staticUbs); result.addAttribute("staticStep", staticSteps); - result.addAttribute("operand_segment_sizes", + result.addAttribute("operandSegmentSizes", parser.getBuilder().getDenseI32ArrayAttr( {static_cast(dynamicLbs.size()), static_cast(dynamicUbs.size()), @@ -1400,7 +1400,7 @@ void ForallOp::build( result.addAttribute(getStaticStepAttrName(result.name), b.getDenseI64ArrayAttr(staticSteps)); result.addAttribute( - "operand_segment_sizes", + "operandSegmentSizes", b.getDenseI32ArrayAttr({static_cast(dynamicLbs.size()), static_cast(dynamicUbs.size()), static_cast(dynamicSteps.size()), @@ -1601,7 +1601,7 @@ struct ForallOpSingleOrZeroIterationDimsFolder op.getOutputs(), std::nullopt, nullptr); newOp.getBodyRegion().getBlocks().clear(); // The new loop needs to keep all attributes from the old one, except for - // "operand_segment_sizes" and static loop bound attributes which capture + // "operandSegmentSizes" and static loop bound attributes which capture // the outdated information of the old iteration domain. SmallVector elidedAttrs{newOp.getOperandSegmentSizesAttrName(), newOp.getStaticLowerBoundAttrName(), @@ -2833,7 +2833,7 @@ ParseResult ParallelOp::parse(OpAsmParser &parser, OperationState &result) { if (parser.parseRegion(*body, ivs)) return failure(); - // Set `operand_segment_sizes` attribute. + // Set `operandSegmentSizes` attribute. result.addAttribute( ParallelOp::getOperandSegmentSizeAttr(), builder.getDenseI32ArrayAttr({static_cast(lower.size()), diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 325f986f97694..af41532670890 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1043,6 +1043,12 @@ std::pair AliasInitializer::visitImpl( void AliasInitializer::markAliasNonDeferrable(size_t aliasIndex) { auto it = std::next(aliases.begin(), aliasIndex); + + // If already marked non-deferrable stop the recursion. + // All children should already be marked non-deferrable as well. + if (!it->second.canBeDeferred) + return; + it->second.canBeDeferred = false; // Propagate the non-deferrable flag to any child aliases. diff --git a/mlir/lib/IR/Dialect.cpp b/mlir/lib/IR/Dialect.cpp index 501f52b83e026..1de49769974ac 100644 --- a/mlir/lib/IR/Dialect.cpp +++ b/mlir/lib/IR/Dialect.cpp @@ -125,7 +125,8 @@ DialectInterfaceCollectionBase::DialectInterfaceCollectionBase( MLIRContext *ctx, TypeID interfaceKind, StringRef interfaceName) { for (auto *dialect : ctx->getLoadedDialects()) { #ifndef NDEBUG - dialect->handleUseOfUndefinedPromisedInterface(interfaceKind, interfaceName); + dialect->handleUseOfUndefinedPromisedInterface(interfaceKind, + interfaceName); #endif if (auto *interface = dialect->getRegisteredInterface(interfaceKind)) { interfaces.insert(interface); @@ -243,8 +244,9 @@ void DialectRegistry::applyExtensions(Dialect *dialect) const { extension.apply(ctx, requiredDialects); }; - for (const auto &extension : extensions) - applyExtension(*extension); + // Note: Additional extensions may be added while applying an extension. + for (int i = 0; i < static_cast(extensions.size()); ++i) + applyExtension(*extensions[i]); } void DialectRegistry::applyExtensions(MLIRContext *ctx) const { @@ -264,8 +266,9 @@ void DialectRegistry::applyExtensions(MLIRContext *ctx) const { extension.apply(ctx, requiredDialects); }; - for (const auto &extension : extensions) - applyExtension(*extension); + // Note: Additional extensions may be added while applying an extension. + for (int i = 0; i < static_cast(extensions.size()); ++i) + applyExtension(*extensions[i]); } bool DialectRegistry::isSubsetOf(const DialectRegistry &rhs) const { diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index fe4597f3df3d2..a562a00eb1953 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/Threading.h" #include "mlir/IR/Verifier.h" #include "mlir/Support/FileUtilities.h" +#include "llvm/ADT/Hashing.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/Support/CommandLine.h" @@ -424,6 +425,23 @@ LogicalResult OpPassManager::initialize(MLIRContext *context, return success(); } +llvm::hash_code OpPassManager::hash() { + llvm::hash_code hashCode{}; + for (Pass &pass : getPasses()) { + // If this pass isn't an adaptor, directly hash it. + auto *adaptor = dyn_cast(&pass); + if (!adaptor) { + hashCode = llvm::hash_combine(hashCode, &pass); + continue; + } + // Otherwise, hash recursively each of the adaptors pass managers. + for (OpPassManager &adaptorPM : adaptor->getPassManagers()) + llvm::hash_combine(hashCode, adaptorPM.hash()); + } + return hashCode; +} + + //===----------------------------------------------------------------------===// // OpToOpPassAdaptor //===----------------------------------------------------------------------===// @@ -825,10 +843,12 @@ LogicalResult PassManager::run(Operation *op) { // Initialize all of the passes within the pass manager with a new generation. llvm::hash_code newInitKey = context->getRegistryHash(); - if (newInitKey != initializationKey) { + llvm::hash_code pipelineKey = hash(); + if (newInitKey != initializationKey || pipelineKey != pipelineInitializationKey) { if (failed(initialize(context, impl->initializationGeneration + 1))) return failure(); initializationKey = newInitKey; + pipelineKey = pipelineInitializationKey; } // Construct a top level analysis manager for the pipeline. diff --git a/mlir/lib/Rewrite/ByteCode.cpp b/mlir/lib/Rewrite/ByteCode.cpp index c8c442823781b..e7d4c4089a991 100644 --- a/mlir/lib/Rewrite/ByteCode.cpp +++ b/mlir/lib/Rewrite/ByteCode.cpp @@ -1846,7 +1846,7 @@ void ByteCodeExecutor::executeGetOperands() { ByteCodeField rangeIndex = read(); void *result = executeGetOperandsResults( - op->getOperands(), op, index, rangeIndex, "operand_segment_sizes", + op->getOperands(), op, index, rangeIndex, "operandSegmentSizes", valueRangeMemory); if (!result) LLVM_DEBUG(llvm::dbgs() << " * Invalid operand range\n"); @@ -1872,7 +1872,7 @@ void ByteCodeExecutor::executeGetResults() { ByteCodeField rangeIndex = read(); void *result = executeGetOperandsResults( - op->getResults(), op, index, rangeIndex, "result_segment_sizes", + op->getResults(), op, index, rangeIndex, "resultSegmentSizes", valueRangeMemory); if (!result) LLVM_DEBUG(llvm::dbgs() << " * Invalid result range\n"); diff --git a/mlir/lib/Transforms/Canonicalizer.cpp b/mlir/lib/Transforms/Canonicalizer.cpp index b4ad85c7c7dad..d50019bd6aee5 100644 --- a/mlir/lib/Transforms/Canonicalizer.cpp +++ b/mlir/lib/Transforms/Canonicalizer.cpp @@ -29,7 +29,8 @@ struct Canonicalizer : public impl::CanonicalizerBase { Canonicalizer() = default; Canonicalizer(const GreedyRewriteConfig &config, ArrayRef disabledPatterns, - ArrayRef enabledPatterns) { + ArrayRef enabledPatterns) + : config(config) { this->topDownProcessingEnabled = config.useTopDownTraversal; this->enableRegionSimplification = config.enableRegionSimplification; this->maxIterations = config.maxIterations; @@ -41,30 +42,31 @@ struct Canonicalizer : public impl::CanonicalizerBase { /// Initialize the canonicalizer by building the set of patterns used during /// execution. LogicalResult initialize(MLIRContext *context) override { + // Set the config from possible pass options set in the meantime. + config.useTopDownTraversal = topDownProcessingEnabled; + config.enableRegionSimplification = enableRegionSimplification; + config.maxIterations = maxIterations; + config.maxNumRewrites = maxNumRewrites; + RewritePatternSet owningPatterns(context); for (auto *dialect : context->getLoadedDialects()) dialect->getCanonicalizationPatterns(owningPatterns); for (RegisteredOperationName op : context->getRegisteredOperations()) op.getCanonicalizationPatterns(owningPatterns, context); - patterns = FrozenRewritePatternSet(std::move(owningPatterns), - disabledPatterns, enabledPatterns); + patterns = std::make_shared( + std::move(owningPatterns), disabledPatterns, enabledPatterns); return success(); } void runOnOperation() override { - GreedyRewriteConfig config; - config.useTopDownTraversal = topDownProcessingEnabled; - config.enableRegionSimplification = enableRegionSimplification; - config.maxIterations = maxIterations; - config.maxNumRewrites = maxNumRewrites; LogicalResult converged = - applyPatternsAndFoldGreedily(getOperation(), patterns, config); + applyPatternsAndFoldGreedily(getOperation(), *patterns, config); // Canonicalization is best-effort. Non-convergence is not a pass failure. if (testConvergence && failed(converged)) signalPassFailure(); } - - FrozenRewritePatternSet patterns; + GreedyRewriteConfig config; + std::shared_ptr patterns; }; } // namespace diff --git a/mlir/test/Bytecode/operand_segment_sizes.mlir b/mlir/test/Bytecode/operand_segment_sizes.mlir index 9791bd4e0f264..c0379c2994f49 100644 --- a/mlir/test/Bytecode/operand_segment_sizes.mlir +++ b/mlir/test/Bytecode/operand_segment_sizes.mlir @@ -2,7 +2,7 @@ func.func @roundtripOperandSizeAttr(%arg0: i32) { - // CHECK: odsOperandSegmentSizes = array}> - "test.attr_sized_operands"(%arg0, %arg0, %arg0, %arg0) <{odsOperandSegmentSizes = array}> : (i32, i32, i32, i32) -> () + // CHECK: operandSegmentSizes = array}> + "test.attr_sized_operands"(%arg0, %arg0, %arg0, %arg0) <{operandSegmentSizes = array}> : (i32, i32, i32, i32) -> () return } diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index ab91729a0556b..b83b122f75e4b 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -79,7 +79,7 @@ func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> () "test.payload"(%arg6, %arg7) : (index, index) -> () omp.yield - }) {operand_segment_sizes = array} : (index, index, index, index, index, index) -> () + }) {operandSegmentSizes = array} : (index, index, index, index, index, index) -> () omp.terminator } return @@ -328,7 +328,7 @@ llvm.func @_QPsimple_reduction(%arg0: !llvm.ptr> {fir.bindc_nam %5 = llvm.zext %2 : i1 to i32 llvm.store %5, %4 : !llvm.ptr omp.parallel { - %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = array, pinned} : (i64) -> !llvm.ptr + %6 = llvm.alloca %3 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array, pinned} : (i64) -> !llvm.ptr omp.wsloop reduction(@eqv_reduction -> %4 : !llvm.ptr) for (%arg1) : i32 = (%1) to (%0) inclusive step (%1) { llvm.store %arg1, %6 : !llvm.ptr %7 = llvm.load %6 : !llvm.ptr diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir index e280cd65811db..80c65e14e7635 100644 --- a/mlir/test/Dialect/GPU/invalid.mlir +++ b/mlir/test/Dialect/GPU/invalid.mlir @@ -4,7 +4,7 @@ func.func @not_enough_sizes(%sz : index) { // expected-error@+1 {{expected 6 or more operands, but found 5}} "gpu.launch"(%sz, %sz, %sz, %sz, %sz) ({ gpu.return - }) {operand_segment_sizes = array} : (index, index, index, index, index) -> () + }) {operandSegmentSizes = array} : (index, index, index, index, index) -> () return } @@ -16,7 +16,7 @@ func.func @no_region_attrs(%sz : index) { ^bb1(%bx: index, %by: index, %bz: index, %tx: index, %ty: index, %tz: index): gpu.terminator - }) {operand_segment_sizes = array} : (index, index, index, index, index, index) -> () + }) {operandSegmentSizes = array} : (index, index, index, index, index, index) -> () return } @@ -38,7 +38,7 @@ func.func @launch_requires_gpu_return(%sz : index) { func.func @launch_func_too_few_operands(%sz : index) { // expected-error@+1 {{expected 6 or more operands}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz) - {operand_segment_sizes = array} + {operandSegmentSizes = array} : (index, index, index, index, index) -> () return } @@ -57,7 +57,7 @@ module attributes {gpu.container_module} { func.func @launch_func_missing_callee_attribute(%sz : index) { // expected-error@+1 {{'gpu.launch_func' op requires attribute 'kernel'}} "gpu.launch_func"(%sz, %sz, %sz, %sz, %sz, %sz) - {operand_segment_sizes = array} + {operandSegmentSizes = array} : (index, index, index, index, index, index) -> () return } diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 14141c4c243ab..cf4697b17aa46 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -887,7 +887,7 @@ func.func @switch_wrong_number_of_weights(%arg0 : i32) { func.func @switch_case_type_mismatch(%arg0 : i64) { // expected-error@below {{expects case value type to match condition value type}} - "llvm.switch"(%arg0)[^bb1, ^bb2] <{case_operand_segments = array, case_values = dense<42> : vector<1xi32>, odsOperandSegmentSizes = array}> : (i64) -> () + "llvm.switch"(%arg0)[^bb1, ^bb2] <{case_operand_segments = array, case_values = dense<42> : vector<1xi32>, operandSegmentSizes = array}> : (i64) -> () ^bb1: // pred: ^bb0 llvm.return ^bb2: // pred: ^bb0 diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir index 8f00d54655327..b0bb06cc8654a 100644 --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -178,7 +178,7 @@ func.func @depthwise_conv_2d_input_nhwc_filter_default_attributes(%input: memref // ----- func.func @depthwise_conv_2d_input_nhwc_filter_wrong_stride_element_type_properties(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { - // expected-error @+1 {{invalid properties {dilations = dense<1> : vector<2xi64>, operand_segment_sizes = array, strides = dense<2.000000e+00> : vector<2xf32>} for op linalg.depthwise_conv_2d_nhwc_hwc: Invalid attribute `strides` in property conversion: dense<2.000000e+00> : vector<2xf32>}} + // expected-error @+1 {{invalid properties {dilations = dense<1> : vector<2xi64>, operandSegmentSizes = array, strides = dense<2.000000e+00> : vector<2xf32>} for op linalg.depthwise_conv_2d_nhwc_hwc: Invalid attribute `strides` in property conversion: dense<2.000000e+00> : vector<2xf32>}} linalg.depthwise_conv_2d_nhwc_hwc <{dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>}> ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) outs(%output: memref<1x56x56x96xf32>) @@ -1100,7 +1100,7 @@ func.func @conv_interface_wrong_input_indexing_map( %1 = "arith.mulf"(%arg3, %arg4) : (f32, f32) -> f32 %2 = "arith.addf"(%arg5, %1) : (f32, f32) -> f32 "linalg.yield"(%2) : (f32) -> () - }) {dilations = dense<1> : tensor<2xi64>, linalg.memoized_indexing_maps = [#map0, #map1, #map2], operand_segment_sizes = array, strides = dense<2> : tensor<2xi64>} : (tensor, tensor, tensor) -> tensor + }) {dilations = dense<1> : tensor<2xi64>, linalg.memoized_indexing_maps = [#map0, #map1, #map2], operandSegmentSizes = array, strides = dense<2> : tensor<2xi64>} : (tensor, tensor, tensor) -> tensor return %0 : tensor } @@ -1117,7 +1117,7 @@ func.func @conv_interface_wrong_num_operands( %1 = "arith.mulf"(%arg3, %arg4) : (f32, f32) -> f32 %2 = "arith.addf"(%arg5, %1) : (f32, f32) -> f32 "linalg.yield"(%2) : (f32) -> () - }) {dilations = dense<1> : tensor<2xi64>, linalg.memoized_indexing_maps = [#map0, #map1, #map2], operand_segment_sizes = array, strides = dense<1> : tensor<2xi64>} : (tensor, tensor, tensor) -> tensor + }) {dilations = dense<1> : tensor<2xi64>, linalg.memoized_indexing_maps = [#map0, #map1, #map2], operandSegmentSizes = array, strides = dense<1> : tensor<2xi64>} : (tensor, tensor, tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Math/expand-math.mlir b/mlir/test/Dialect/Math/expand-math.mlir index c28e2141db061..4cd6461102079 100644 --- a/mlir/test/Dialect/Math/expand-math.mlir +++ b/mlir/test/Dialect/Math/expand-math.mlir @@ -222,10 +222,21 @@ func.func @roundf_func(%a: f32) -> f32 { // CHECK-LABEL: func @powf_func // CHECK-SAME: ([[ARG0:%.+]]: f64, [[ARG1:%.+]]: f64) func.func @powf_func(%a: f64, %b: f64) ->f64 { - // CHECK-DAG: [[LOG:%.+]] = math.log [[ARG0]] - // CHECK-DAG: [[MULT:%.+]] = arith.mulf [[LOG]], [[ARG1]] + // CHECK-DAG = [[CST0:%.+]] = arith.constant 0.000000e+00 + // CHECK-DAG: [[TWO:%.+]] = arith.constant 2.000000e+00 + // CHECK-DAG: [[NEGONE:%.+]] = arith.constant -1.000000e+00 + // CHECK-DAG: [[SQR:%.+]] = arith.mulf [[ARG0]], [[ARG0]] + // CHECK-DAG: [[HALF:%.+]] = arith.divf [[ARG1]], [[TWO]] + // CHECK-DAG: [[LOG:%.+]] = math.log [[SQR]] + // CHECK-DAG: [[MULT:%.+]] = arith.mulf [[HALF]], [[LOG]] // CHECK-DAG: [[EXPR:%.+]] = math.exp [[MULT]] - // CHECK: return [[EXPR]] + // CHECK-DAG: [[NEGEXPR:%.+]] = arith.mulf [[EXPR]], [[NEGONE]] + // CHECK-DAG: [[REMF:%.+]] = arith.remf [[ARG1]], [[TWO]] + // CHECK-DAG: [[CMPNEG:%.+]] = arith.cmpf olt, [[ARG0]] + // CHECK-DAG: [[CMPZERO:%.+]] = arith.cmpf one, [[REMF]] + // CHECK-DAG: [[AND:%.+]] = arith.andi [[CMPZERO]], [[CMPNEG]] + // CHECK-DAG: [[SEL:%.+]] = arith.select [[AND]], [[NEGEXPR]], [[EXPR]] + // CHECK: return [[SEL]] %ret = math.powf %a, %b : f64 return %ret : f64 } diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir index b65426cad30b6..df66705e83e0e 100644 --- a/mlir/test/Dialect/MemRef/canonicalize.mlir +++ b/mlir/test/Dialect/MemRef/canonicalize.mlir @@ -931,7 +931,7 @@ func.func @fold_multiple_memory_space_cast(%arg : memref) -> memref memref { %idx1 = index.constant 1 %c-2 = arith.constant -2 : index @@ -940,3 +940,18 @@ func.func private @ub_negative_alloc_size() -> memref { %alloc = memref.alloc(%c15, %c-2, %idx1) : memref return %alloc : memref } + +// ----- + +// CHECK-LABEL: func @subview_rank_reduction( +// CHECK-SAME: %[[arg0:.*]]: memref<1x384x384xf32>, %[[arg1:.*]]: index +func.func @subview_rank_reduction(%arg0: memref<1x384x384xf32>, %idx: index) + -> memref> { + %c1 = arith.constant 1 : index + // CHECK: %[[subview:.*]] = memref.subview %[[arg0]][0, %[[arg1]], %[[arg1]]] [1, 1, %[[arg1]]] [1, 1, 1] : memref<1x384x384xf32> to memref<1x?xf32, strided<[384, 1], offset: ?>> + // CHECK: %[[cast:.*]] = memref.cast %[[subview]] : memref<1x?xf32, strided<[384, 1], offset: ?>> to memref> + %0 = memref.subview %arg0[0, %idx, %idx] [1, %c1, %idx] [1, 1, 1] + : memref<1x384x384xf32> to memref> + // CHECK: return %[[cast]] + return %0 : memref> +} diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index fc65fb77ffc88..009f08ced97e0 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -197,7 +197,7 @@ func.func @omp_simdloop(%lb : index, %ub : index, %step : i32) -> () { "omp.simdloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array} : + }) {operandSegmentSizes = array} : (index, index, i32) -> () return @@ -225,7 +225,7 @@ func.func @omp_simdloop_aligned_mismatch(%arg0 : index, %arg1 : index, ^bb0(%arg5: index): "omp.yield"() : () -> () }) {alignment_values = [128], - operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -238,7 +238,7 @@ func.func @omp_simdloop_aligned_negative(%arg0 : index, %arg1 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3, %arg4) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {alignment_values = [-1, 128], operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + }) {alignment_values = [-1, 128], operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -251,7 +251,7 @@ func.func @omp_simdloop_unexpected_alignment(%arg0 : index, %arg1 : index, "omp.simdloop"(%arg0, %arg1, %arg2) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {alignment_values = [1, 128], operand_segment_sizes = array} : (index, index, index) -> () + }) {alignment_values = [1, 128], operandSegmentSizes = array} : (index, index, index) -> () return } @@ -264,7 +264,7 @@ func.func @omp_simdloop_aligned_float(%arg0 : index, %arg1 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3, %arg4) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {alignment_values = [1.5, 128], operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + }) {alignment_values = [1.5, 128], operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -277,7 +277,7 @@ func.func @omp_simdloop_aligned_the_same_var(%arg0 : index, %arg1 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3, %arg3) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {alignment_values = [1, 128], operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + }) {alignment_values = [1, 128], operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -291,7 +291,7 @@ func.func @omp_simdloop_nontemporal_the_same_var(%arg0 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3, %arg3) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + }) {operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -1121,7 +1121,7 @@ func.func @omp_teams_allocate(%data_var : memref) { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.teams" (%data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (memref) -> () + }) {operandSegmentSizes = array} : (memref) -> () omp.terminator } return @@ -1134,7 +1134,7 @@ func.func @omp_teams_num_teams1(%lb : i32) { // expected-error @below {{expected num_teams upper bound to be defined if the lower bound is defined}} "omp.teams" (%lb) ({ omp.terminator - }) {operand_segment_sizes = array} : (i32) -> () + }) {operandSegmentSizes = array} : (i32) -> () omp.terminator } return @@ -1159,7 +1159,7 @@ func.func @omp_sections(%data_var : memref) -> () { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.sections" (%data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (memref) -> () + }) {operandSegmentSizes = array} : (memref) -> () return } @@ -1169,7 +1169,7 @@ func.func @omp_sections(%data_var : memref) -> () { // expected-error @below {{expected as many reduction symbol references as reduction variables}} "omp.sections" (%data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (memref) -> () + }) {operandSegmentSizes = array} : (memref) -> () return } @@ -1284,7 +1284,7 @@ func.func @omp_single(%data_var : memref) -> () { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.single" (%data_var) ({ omp.barrier - }) {operand_segment_sizes = array} : (memref) -> () + }) {operandSegmentSizes = array} : (memref) -> () return } @@ -1294,7 +1294,7 @@ func.func @omp_task_depend(%data_var: memref) { // expected-error @below {{op expected as many depend values as depend variables}} "omp.task"(%data_var) ({ "omp.terminator"() : () -> () - }) {depends = [], operand_segment_sizes = array} : (memref) -> () + }) {depends = [], operandSegmentSizes = array} : (memref) -> () "func.return"() : () -> () } @@ -1486,7 +1486,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testmemref) ({ ^bb0(%arg3: i32, %arg4: i32): "omp.terminator"() : () -> () - }) {operand_segment_sizes = array} : (i32, i32, i32, i32, i32, i32, memref) -> () + }) {operandSegmentSizes = array} : (i32, i32, i32, i32, i32, i32, memref) -> () return } @@ -1499,7 +1499,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ ^bb0(%arg3: i32, %arg4: i32): "omp.terminator"() : () -> () - }) {operand_segment_sizes = array, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + }) {operandSegmentSizes = array, reductions = [@add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () return } @@ -1512,7 +1512,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32) ({ ^bb0(%arg3: i32, %arg4: i32): "omp.terminator"() : () -> () - }) {operand_segment_sizes = array, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + }) {operandSegmentSizes = array, reductions = [@add_f32, @add_f32]} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () return } @@ -1525,7 +1525,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32, %testf32_2) ({ ^bb0(%arg3: i32, %arg4: i32): "omp.terminator"() : () -> () - }) {in_reductions = [@add_f32], operand_segment_sizes = array} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () + }) {in_reductions = [@add_f32], operandSegmentSizes = array} : (i32, i32, i32, i32, i32, i32, !llvm.ptr, !llvm.ptr) -> () return } @@ -1538,7 +1538,7 @@ func.func @taskloop(%lb: i32, %ub: i32, %step: i32) { "omp.taskloop"(%lb, %ub, %ub, %lb, %step, %step, %testf32_2) ({ ^bb0(%arg3: i32, %arg4: i32): "omp.terminator"() : () -> () - }) {in_reductions = [@add_f32, @add_f32], operand_segment_sizes = array} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () + }) {in_reductions = [@add_f32, @add_f32], operandSegmentSizes = array} : (i32, i32, i32, i32, i32, i32, !llvm.ptr) -> () return } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 2f0d224a3fef7..be59defd27d03 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -59,7 +59,7 @@ func.func @omp_parallel(%data_var : memref, %if_cond : i1, %num_threads : i // CHECK: omp.parallel num_threads(%{{.*}} : i32) allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel"(%num_threads, %data_var, %data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (i32, memref, memref) -> () + }) {operandSegmentSizes = array} : (i32, memref, memref) -> () // CHECK: omp.barrier omp.barrier @@ -68,22 +68,22 @@ func.func @omp_parallel(%data_var : memref, %if_cond : i1, %num_threads : i // CHECK: omp.parallel if(%{{.*}}) allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel"(%if_cond, %data_var, %data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (i1, memref, memref) -> () + }) {operandSegmentSizes = array} : (i1, memref, memref) -> () // test without allocate // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) "omp.parallel"(%if_cond, %num_threads) ({ omp.terminator - }) {operand_segment_sizes = array} : (i1, i32) -> () + }) {operandSegmentSizes = array} : (i1, i32) -> () omp.terminator - }) {operand_segment_sizes = array, proc_bind_val = #omp} : (i1, i32, memref, memref) -> () + }) {operandSegmentSizes = array, proc_bind_val = #omp} : (i1, i32, memref, memref) -> () // test with multiple parameters for single variadic argument // CHECK: omp.parallel allocate(%{{.*}} : memref -> %{{.*}} : memref) "omp.parallel" (%data_var, %data_var) ({ omp.terminator - }) {operand_segment_sizes = array} : (memref, memref) -> () + }) {operandSegmentSizes = array} : (memref, memref) -> () return } @@ -141,7 +141,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array, ordered_val = 1} : + }) {operandSegmentSizes = array, ordered_val = 1} : (index, index, index) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref) schedule(static) @@ -149,7 +149,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array, schedule_val = #omp} : + }) {operandSegmentSizes = array, schedule_val = #omp} : (index, index, index, memref, i32) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref, %{{.*}} = %{{.*}} : memref) schedule(static) @@ -157,7 +157,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array, schedule_val = #omp} : + }) {operandSegmentSizes = array, schedule_val = #omp} : (index, index, index, memref, memref, i32, i32) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}) ordered(2) @@ -165,7 +165,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array, schedule_val = #omp, ordered_val = 2} : + }) {operandSegmentSizes = array, schedule_val = #omp, ordered_val = 2} : (index, index, index, memref, i32, i32) -> () // CHECK: omp.wsloop schedule(auto) nowait @@ -173,7 +173,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array, nowait, schedule_val = #omp} : + }) {operandSegmentSizes = array, nowait, schedule_val = #omp} : (index, index, index) -> () return @@ -333,7 +333,7 @@ func.func @omp_simdloop(%lb : index, %ub : index, %step : index) -> () { "omp.simdloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operand_segment_sizes = array} : + }) {operandSegmentSizes = array} : (index, index, index) -> () return @@ -349,7 +349,7 @@ func.func @omp_simdloop_aligned_list(%arg0 : index, %arg1 : index, %arg2 : index ^bb0(%arg5: index): "omp.yield"() : () -> () }) {alignment_values = [32, 128], - operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -362,7 +362,7 @@ func.func @omp_simdloop_aligned_single(%arg0 : index, %arg1 : index, %arg2 : ind ^bb0(%arg5: index): "omp.yield"() : () -> () }) {alignment_values = [32], - operand_segment_sizes = array} : (index, index, index, memref) -> () + operandSegmentSizes = array} : (index, index, index, memref) -> () return } @@ -377,7 +377,7 @@ func.func @omp_simdloop_nontemporal_list(%arg0 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3, %arg4) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {operand_segment_sizes = array} : (index, index, index, memref, memref) -> () + }) {operandSegmentSizes = array} : (index, index, index, memref, memref) -> () return } @@ -392,7 +392,7 @@ func.func @omp_simdloop_nontemporal_single(%arg0 : index, "omp.simdloop"(%arg0, %arg1, %arg2, %arg3) ({ ^bb0(%arg5: index): "omp.yield"() : () -> () - }) {operand_segment_sizes = array} : (index, index, index, memref) -> () + }) {operandSegmentSizes = array} : (index, index, index, memref) -> () return } @@ -487,7 +487,7 @@ func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %map1: "omp.target"(%if_cond, %device, %num_threads) ({ // CHECK: omp.terminator omp.terminator - }) {nowait, operand_segment_sizes = array} : ( i1, si32, i32 ) -> () + }) {nowait, operandSegmentSizes = array} : ( i1, si32, i32 ) -> () // Test with optional map clause. // CHECK: omp.target map((tofrom -> %{{.*}} : memref), (alloc -> %{{.*}} : memref)) { @@ -1428,13 +1428,13 @@ func.func @omp_sectionsop(%data_var1 : memref, %data_var2 : memref, "omp.sections" (%data_var1, %data_var1) ({ // CHECK: omp.terminator omp.terminator - }) {operand_segment_sizes = array} : (memref, memref) -> () + }) {operandSegmentSizes = array} : (memref, memref) -> () // CHECK: omp.sections reduction(@add_f32 -> %{{.*}} : !llvm.ptr) "omp.sections" (%redn_var) ({ // CHECK: omp.terminator omp.terminator - }) {operand_segment_sizes = array, reductions=[@add_f32]} : (!llvm.ptr) -> () + }) {operandSegmentSizes = array, reductions=[@add_f32]} : (!llvm.ptr) -> () // CHECK: omp.sections nowait { omp.sections nowait { diff --git a/mlir/test/Dialect/PDL/invalid.mlir b/mlir/test/Dialect/PDL/invalid.mlir index c76bc9dcad72d..c6b7fe1cc1789 100644 --- a/mlir/test/Dialect/PDL/invalid.mlir +++ b/mlir/test/Dialect/PDL/invalid.mlir @@ -122,7 +122,7 @@ pdl.pattern : benefit(1) { // expected-error@below {{expected the same number of attribute values and attribute names, got 1 names and 0 values}} %op = "pdl.operation"() { attributeValueNames = ["attr"], - operand_segment_sizes = array + operandSegmentSizes = array } : () -> (!pdl.operation) rewrite %op with "rewriter" } @@ -230,7 +230,7 @@ pdl.pattern : benefit(1) { // expected-error@below {{expected no replacement values to be provided when the replacement operation is present}} "pdl.replace"(%root, %newOp, %newResult) { - operand_segment_sizes = array + operandSegmentSizes = array } : (!pdl.operation, !pdl.operation, !pdl.value) -> () } } @@ -276,7 +276,7 @@ pdl.pattern : benefit(1) { // expected-error@below {{expected rewrite region to be non-empty if external name is not specified}} "pdl.rewrite"(%op) ({}) { - operand_segment_sizes = array + operandSegmentSizes = array } : (!pdl.operation) -> () } @@ -289,7 +289,7 @@ pdl.pattern : benefit(1) { "pdl.rewrite"(%op, %op) ({ ^bb1: }) { - operand_segment_sizes = array + operandSegmentSizes = array }: (!pdl.operation, !pdl.operation) -> () } @@ -303,7 +303,7 @@ pdl.pattern : benefit(1) { ^bb1: }) { name = "foo", - operand_segment_sizes = array + operandSegmentSizes = array } : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/PDLInterp/invalid.mlir b/mlir/test/Dialect/PDLInterp/invalid.mlir index 0457a158430a2..c201dda71ef7f 100644 --- a/mlir/test/Dialect/PDLInterp/invalid.mlir +++ b/mlir/test/Dialect/PDLInterp/invalid.mlir @@ -19,7 +19,7 @@ pdl_interp.func @rewriter() { inferredResultTypes, inputAttributeNames = [], name = "foo.op", - operand_segment_sizes = array + operandSegmentSizes = array } : (!pdl.type) -> (!pdl.operation) pdl_interp.finalize } diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir index 8566943ef8012..0ff3eaadc8fec 100644 --- a/mlir/test/Dialect/SCF/invalid.mlir +++ b/mlir/test/Dialect/SCF/invalid.mlir @@ -139,7 +139,7 @@ func.func @parallel_body_arguments_wrong_type( "scf.parallel"(%arg0, %arg1, %arg2) ({ ^bb0(%i0: f32): scf.yield - }) {operand_segment_sizes = array}: (index, index, index) -> () + }) {operandSegmentSizes = array}: (index, index, index) -> () return } @@ -151,7 +151,7 @@ func.func @parallel_body_wrong_number_of_arguments( "scf.parallel"(%arg0, %arg1, %arg2) ({ ^bb0(%i0: index, %i1: index): scf.yield - }) {operand_segment_sizes = array}: (index, index, index) -> () + }) {operandSegmentSizes = array}: (index, index, index) -> () return } @@ -689,7 +689,7 @@ func.func @parallel_missing_terminator(%0 : index) { ^bb0(%arg1: index): // expected-note @below {{terminator here}} %2 = "arith.constant"() {value = 1.000000e+00 : f32} : () -> f32 - }) {operand_segment_sizes = array} : (index, index, index) -> () + }) {operandSegmentSizes = array} : (index, index, index) -> () return } diff --git a/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir b/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir index af3f3ea2889f7..8496448759f0c 100644 --- a/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir +++ b/mlir/test/Dialect/SPIRV/IR/control-flow-ops.mlir @@ -117,7 +117,7 @@ func.func @wrong_condition_type() -> () { func.func @wrong_accessor_count() -> () { %true = spirv.Constant true // expected-error @+1 {{requires 2 successors but found 1}} - "spirv.BranchConditional"(%true)[^one] {operand_segment_sizes = array} : (i1) -> () + "spirv.BranchConditional"(%true)[^one] {operandSegmentSizes = array} : (i1) -> () ^one: spirv.Return ^two: @@ -130,7 +130,7 @@ func.func @wrong_number_of_weights() -> () { %true = spirv.Constant true // expected-error @+1 {{must have exactly two branch weights}} "spirv.BranchConditional"(%true)[^one, ^two] {branch_weights = [1 : i32, 2 : i32, 3 : i32], - operand_segment_sizes = array} : (i1) -> () + operandSegmentSizes = array} : (i1) -> () ^one: spirv.Return ^two: diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir index c72af7363f67f..3e30947769eb4 100644 --- a/mlir/test/Dialect/Transform/ops-invalid.mlir +++ b/mlir/test/Dialect/Transform/ops-invalid.mlir @@ -76,7 +76,7 @@ transform.sequence failures(propagate) { "transform.sequence"(%0) ({ ^bb0(%arg0: !transform.any_op): "transform.yield"() : () -> () -}) {failure_propagation_mode = 1 : i32, operand_segment_sizes = array} : (!transform.any_op) -> () +}) {failure_propagation_mode = 1 : i32, operandSegmentSizes = array} : (!transform.any_op) -> () // ----- diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index 66c9adca8f98c..0193fae37af7f 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -460,7 +460,7 @@ func.func @verbose_terminators() -> (i1, i17) { ^bb1(%x : i1, %y : i17): // CHECK: cf.cond_br %{{.*}}, ^bb2(%{{.*}} : i17), ^bb3(%{{.*}}, %{{.*}} : i1, i17) - "cf.cond_br"(%x, %y, %x, %y) [^bb2, ^bb3] {operand_segment_sizes = array} : (i1, i17, i1, i17) -> () + "cf.cond_br"(%x, %y, %x, %y) [^bb2, ^bb3] {operandSegmentSizes = array} : (i1, i17, i1, i17) -> () ^bb2(%a : i17): %true = arith.constant true diff --git a/mlir/test/IR/recursive-type.mlir b/mlir/test/IR/recursive-type.mlir index bc9b2cdbea6b6..121ba095573ba 100644 --- a/mlir/test/IR/recursive-type.mlir +++ b/mlir/test/IR/recursive-type.mlir @@ -1,6 +1,8 @@ // RUN: mlir-opt %s -test-recursive-types | FileCheck %s // CHECK: !testrec = !test.test_rec> +// CHECK: ![[$NAME:.*]] = !test.test_rec_alias> +// CHECK: ![[$NAME2:.*]] = !test.test_rec_alias, i32>> // CHECK-LABEL: @roundtrip func.func @roundtrip() { @@ -12,6 +14,16 @@ func.func @roundtrip() { // into inifinite recursion. // CHECK: !testrec "test.dummy_op_for_roundtrip"() : () -> !test.test_rec> + + // CHECK: () -> ![[$NAME]] + // CHECK: () -> ![[$NAME]] + "test.dummy_op_for_roundtrip"() : () -> !test.test_rec_alias> + "test.dummy_op_for_roundtrip"() : () -> !test.test_rec_alias> + + // CHECK: () -> ![[$NAME2]] + // CHECK: () -> ![[$NAME2]] + "test.dummy_op_for_roundtrip"() : () -> !test.test_rec_alias, i32>> + "test.dummy_op_for_roundtrip"() : () -> !test.test_rec_alias, i32>> return } diff --git a/mlir/test/IR/traits.mlir b/mlir/test/IR/traits.mlir index 7d922ecf67de5..0402ebe758750 100644 --- a/mlir/test/IR/traits.mlir +++ b/mlir/test/IR/traits.mlir @@ -383,101 +383,101 @@ func.func private @foo() // ----- func.func @failedMissingOperandSizeAttr(%arg: i32) { - // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operand_segment_sizes'}} + // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operandSegmentSizes'}} "test.attr_sized_operands"(%arg, %arg, %arg, %arg) : (i32, i32, i32, i32) -> () } // ----- func.func @failedOperandSizeAttrWrongType(%arg: i32) { - // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operand_segment_sizes'}} - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = 10} : (i32, i32, i32, i32) -> () + // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operandSegmentSizes'}} + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = 10} : (i32, i32, i32, i32) -> () } // ----- func.func @failedOperandSizeAttrWrongElementType(%arg: i32) { - // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operand_segment_sizes'}} - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = array} : (i32, i32, i32, i32) -> () + // expected-error @+1 {{op operand count (4) does not match with the total size (0) specified in attribute 'operandSegmentSizes'}} + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = array} : (i32, i32, i32, i32) -> () } // ----- func.func @failedOperandSizeAttrNegativeValue(%arg: i32) { - // expected-error @+1 {{'operand_segment_sizes' attribute cannot have negative elements}} - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = array} : (i32, i32, i32, i32) -> () + // expected-error @+1 {{'operandSegmentSizes' attribute cannot have negative elements}} + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = array} : (i32, i32, i32, i32) -> () } // ----- func.func @failedOperandSizeAttrWrongTotalSize(%arg: i32) { - // expected-error @+1 {{operand count (4) does not match with the total size (3) specified in attribute 'operand_segment_sizes'}} - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = array} : (i32, i32, i32, i32) -> () + // expected-error @+1 {{operand count (4) does not match with the total size (3) specified in attribute 'operandSegmentSizes'}} + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = array} : (i32, i32, i32, i32) -> () } // ----- func.func @failedOperandSizeAttrWrongCount(%arg: i32) { - // expected-error @+1 {{test.attr_sized_operands' op operand count (4) does not match with the total size (0) specified in attribute 'operand_segment_sizes}} - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = array} : (i32, i32, i32, i32) -> () + // expected-error @+1 {{test.attr_sized_operands' op operand count (4) does not match with the total size (0) specified in attribute 'operandSegmentSizes}} + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = array} : (i32, i32, i32, i32) -> () } // ----- func.func @succeededOperandSizeAttr(%arg: i32) { // CHECK: test.attr_sized_operands - "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operand_segment_sizes = array} : (i32, i32, i32, i32) -> () + "test.attr_sized_operands"(%arg, %arg, %arg, %arg) {operandSegmentSizes = array} : (i32, i32, i32, i32) -> () return } // ----- func.func @failedMissingResultSizeAttr() { - // expected-error @+1 {{op result count (4) does not match with the total size (0) specified in attribute 'result_segment_sizes'}} + // expected-error @+1 {{op result count (4) does not match with the total size (0) specified in attribute 'resultSegmentSizes'}} %0:4 = "test.attr_sized_results"() : () -> (i32, i32, i32, i32) } // ----- func.func @failedResultSizeAttrWrongType() { - // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'result_segment_sizes'}} - %0:4 = "test.attr_sized_results"() {result_segment_sizes = 10} : () -> (i32, i32, i32, i32) + // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'resultSegmentSizes'}} + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = 10} : () -> (i32, i32, i32, i32) } // ----- func.func @failedResultSizeAttrWrongElementType() { - // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'result_segment_sizes'}} - %0:4 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32) + // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'resultSegmentSizes'}} + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32) } // ----- func.func @failedResultSizeAttrNegativeValue() { - // expected-error @+1 {{'result_segment_sizes' attribute cannot have negative elements}} - %0:4 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32) + // expected-error @+1 {{'resultSegmentSizes' attribute cannot have negative elements}} + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32) } // ----- func.func @failedResultSizeAttrWrongTotalSize() { - // expected-error @+1 {{result count (4) does not match with the total size (3) specified in attribute 'result_segment_sizes'}} - %0:4 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32) + // expected-error @+1 {{result count (4) does not match with the total size (3) specified in attribute 'resultSegmentSizes'}} + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32) } // ----- func.func @failedResultSizeAttrWrongCount() { - // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'result_segment_sizes'}} - %0:4 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32) + // expected-error @+1 {{ op result count (4) does not match with the total size (0) specified in attribute 'resultSegmentSizes'}} + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32) } // ----- func.func @succeededResultSizeAttr() { // CHECK: test.attr_sized_results - %0:4 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32) + %0:4 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32) return } diff --git a/mlir/test/Rewrite/pdl-bytecode.mlir b/mlir/test/Rewrite/pdl-bytecode.mlir index 57bec8ce37073..513ff3c40bc64 100644 --- a/mlir/test/Rewrite/pdl-bytecode.mlir +++ b/mlir/test/Rewrite/pdl-bytecode.mlir @@ -1093,7 +1093,7 @@ module @patterns { // CHECK-NEXT: "test.success"(%[[INPUTS]]#4) : (i32) -> () module @ir attributes { test.get_operands_2 } { %inputs:5 = "test.producer"() : () -> (i32, i32, i32, i32, i32) - "test.attr_sized_operands"(%inputs#0, %inputs#1, %inputs#2, %inputs#3, %inputs#4) {operand_segment_sizes = array} : (i32, i32, i32, i32, i32) -> () + "test.attr_sized_operands"(%inputs#0, %inputs#1, %inputs#2, %inputs#3, %inputs#4) {operandSegmentSizes = array} : (i32, i32, i32, i32, i32) -> () } // ----- @@ -1246,7 +1246,7 @@ module @patterns { // CHECK: %[[RESULTS_2_SINGLE:.*]] = "test.success"() : () -> i32 // CHECK: "test.consumer"(%[[RESULTS_1]]#0, %[[RESULTS_1]]#1, %[[RESULTS_1]]#2, %[[RESULTS_1]]#3, %[[RESULTS_2]]) : (i32, i32, i32, i32, i32) -> () module @ir attributes { test.get_results_2 } { - %results:5 = "test.attr_sized_results"() {result_segment_sizes = array} : () -> (i32, i32, i32, i32, i32) + %results:5 = "test.attr_sized_results"() {resultSegmentSizes = array} : () -> (i32, i32, i32, i32, i32) "test.consumer"(%results#0, %results#1, %results#2, %results#3, %results#4) : (i32, i32, i32, i32, i32) -> () } diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 1573f30d5b391..a58d4f1463a0b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -2,7 +2,7 @@ llvm.func @_QPopenmp_target_data() { %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr omp.target_data map((tofrom -> %1 : !llvm.ptr)) { %2 = llvm.mlir.constant(99 : i32) : i32 llvm.store %2, %1 : !llvm.ptr @@ -79,9 +79,9 @@ llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr>) { llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr>, %3 : !llvm.ptr>) { %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr + %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr + %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr %8 = llvm.mlir.constant(5 : i32) : i32 llvm.store %8, %7 : !llvm.ptr %9 = llvm.mlir.constant(2 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index 126fff70ce3b1..bead0200b2731 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -5,11 +5,11 @@ module attributes {omp.is_target_device = true} { %0 = llvm.mlir.constant(20 : i32) : i32 %1 = llvm.mlir.constant(10 : i32) : i32 %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr llvm.store %1, %3 : !llvm.ptr llvm.store %0, %5 : !llvm.ptr omp.target { diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir index e130f96af79f7..9ba083d5137d8 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir @@ -5,11 +5,11 @@ module attributes {omp.is_target_device = false} { %0 = llvm.mlir.constant(20 : i32) : i32 %1 = llvm.mlir.constant(10 : i32) : i32 %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr llvm.store %1, %3 : !llvm.ptr llvm.store %0, %5 : !llvm.ptr omp.target { diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir index cfa8039c94ba2..7f5e79db9bcd6 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -5,11 +5,11 @@ module attributes {omp.is_target_device = false} { %0 = llvm.mlir.constant(20 : i32) : i32 %1 = llvm.mlir.constant(10 : i32) : i32 %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %2 x i32 {bindc_name = "a", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEa"} : (i64) -> !llvm.ptr %4 = llvm.mlir.constant(1 : i64) : i64 - %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr + %5 = llvm.alloca %4 x i32 {bindc_name = "b", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEb"} : (i64) -> !llvm.ptr %6 = llvm.mlir.constant(1 : i64) : i64 - %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr + %7 = llvm.alloca %6 x i32 {bindc_name = "c", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFomp_target_regionEc"} : (i64) -> !llvm.ptr llvm.store %1, %3 : !llvm.ptr llvm.store %0, %5 : !llvm.ptr omp.target { diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 6469868b8751f..4fb00660cc423 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -310,7 +310,7 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { llvm.store %3, %4 : !llvm.ptr omp.yield // CHECK: call void @__kmpc_for_static_fini(ptr @[[$loc_struct]], - }) {operand_segment_sizes = array} : (i64, i64, i64) -> () + }) {operandSegmentSizes = array} : (i64, i64, i64) -> () omp.terminator } llvm.return @@ -330,7 +330,7 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr llvm.store %3, %4 : !llvm.ptr omp.yield - }) {operand_segment_sizes = array} : (i64, i64, i64) -> () + }) {operandSegmentSizes = array} : (i64, i64, i64) -> () llvm.return } @@ -348,7 +348,7 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr llvm.store %3, %4 : !llvm.ptr omp.yield - }) {inclusive, operand_segment_sizes = array} : (i64, i64, i64) -> () + }) {inclusive, operandSegmentSizes = array} : (i64, i64, i64) -> () llvm.return } @@ -628,7 +628,7 @@ llvm.func @simdloop_simple(%lb : i64, %ub : i64, %step : i64, %arg0: !llvm.ptr, i64) -> !llvm.ptr llvm.store %3, %4 : !llvm.ptr omp.yield - }) {operand_segment_sizes = array} : + }) {operandSegmentSizes = array} : (i64, i64, i64) -> () llvm.return @@ -733,9 +733,9 @@ llvm.func @simdloop_simple_multiple_simdlen_safelen(%lb1 : i64, %ub1 : i64, %ste // CHECK-LABEL: @simdloop_if llvm.func @simdloop_if(%arg0: !llvm.ptr {fir.bindc_name = "n"}, %arg1: !llvm.ptr {fir.bindc_name = "threshold"}) { %0 = llvm.mlir.constant(1 : i64) : i64 - %1 = llvm.alloca %0 x i32 {adapt.valuebyref, in_type = i32, operand_segment_sizes = array} : (i64) -> !llvm.ptr + %1 = llvm.alloca %0 x i32 {adapt.valuebyref, in_type = i32, operandSegmentSizes = array} : (i64) -> !llvm.ptr %2 = llvm.mlir.constant(1 : i64) : i64 - %3 = llvm.alloca %2 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFtest_simdEi"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %2 x i32 {bindc_name = "i", in_type = i32, operandSegmentSizes = array, uniq_name = "_QFtest_simdEi"} : (i64) -> !llvm.ptr %4 = llvm.mlir.constant(0 : i32) : i32 %5 = llvm.load %arg0 : !llvm.ptr %6 = llvm.mlir.constant(1 : i32) : i32 diff --git a/mlir/test/Transforms/canonicalize-block-merge.mlir b/mlir/test/Transforms/canonicalize-block-merge.mlir index d33c911e042d7..bf44973ab646c 100644 --- a/mlir/test/Transforms/canonicalize-block-merge.mlir +++ b/mlir/test/Transforms/canonicalize-block-merge.mlir @@ -257,7 +257,7 @@ func.func @nomerge(%arg0: i32, %i: i32) { func.func @mismatch_dominance() -> i32 { // CHECK: %[[RES:.*]] = "test.producing_br"() %0 = "test.producing_br"()[^bb1, ^bb2] { - operand_segment_sizes = array + operandSegmentSizes = array } : () -> i32 ^bb1: diff --git a/mlir/test/Transforms/sccp.mlir b/mlir/test/Transforms/sccp.mlir index db24432b65cc6..dcae052c29c24 100644 --- a/mlir/test/Transforms/sccp.mlir +++ b/mlir/test/Transforms/sccp.mlir @@ -204,7 +204,7 @@ func.func @simple_produced_operand() -> (i32, i32) { // CHECK: %[[ONE:.*]] = arith.constant 1 %1 = arith.constant 1 : i32 "test.internal_br"(%1) [^bb1, ^bb2] { - operand_segment_sizes = array + operandSegmentSizes = array } : (i32) -> () ^bb1: diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index 072f6ff4b84d3..debe733f59be4 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -312,6 +312,10 @@ struct TestOpAsmInterface : public OpAsmDialectInterface { return AliasResult::FinalAlias; } } + if (auto recAliasType = dyn_cast(type)) { + os << recAliasType.getName(); + return AliasResult::FinalAlias; + } return AliasResult::NoAlias; } diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 4eb19e6dd6fe2..12a02cf72d2b3 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -264,6 +264,16 @@ def DerivedTypeAttrOp : TEST_Op<"derived_type_attr", []> { "$_builder.getI32IntegerAttr($_self)">; } +def TestPropOp : TEST_Op<"prop">, + Arguments<(ins Variadic:$upperInits, + I32ElementsAttr:$transforms)>, + Results<(outs Variadic:$results)> { + DerivedAttr upperLen = DerivedAttr<"uint32_t", [{ + return getUpperInits().size() / getTransforms().size(); + }], [{ $_builder.getI32IntegerAttr($_self) }]>; +} + + def StringElementsAttrOp : TEST_Op<"string_elements_attr"> { let arguments = (ins StringElementsAttr:$scalar_string_attr diff --git a/mlir/test/lib/Dialect/Test/TestTypeDefs.td b/mlir/test/lib/Dialect/Test/TestTypeDefs.td index 15dbd74aec118..2a8bdad8fb25d 100644 --- a/mlir/test/lib/Dialect/Test/TestTypeDefs.td +++ b/mlir/test/lib/Dialect/Test/TestTypeDefs.td @@ -369,4 +369,26 @@ def TestTypeElseAnchorStruct : Test_Type<"TestTypeElseAnchorStruct"> { let assemblyFormat = "`<` (`?`) : (struct($a, $b)^)? `>`"; } +def TestI32 : Test_Type<"TestI32"> { + let mnemonic = "i32"; +} + +def TestRecursiveAlias + : Test_Type<"TestRecursiveAlias", [NativeTypeTrait<"IsMutable">]> { + let mnemonic = "test_rec_alias"; + let storageClass = "TestRecursiveTypeStorage"; + let storageNamespace = "test"; + let genStorageClass = 0; + + let parameters = (ins "llvm::StringRef":$name); + + let hasCustomAssemblyFormat = 1; + + let extraClassDeclaration = [{ + Type getBody() const; + + void setBody(Type type); + }]; +} + #endif // TEST_TYPEDEFS diff --git a/mlir/test/lib/Dialect/Test/TestTypes.cpp b/mlir/test/lib/Dialect/Test/TestTypes.cpp index 0633752067a14..20dc03a765269 100644 --- a/mlir/test/lib/Dialect/Test/TestTypes.cpp +++ b/mlir/test/lib/Dialect/Test/TestTypes.cpp @@ -482,3 +482,54 @@ void TestDialect::printType(Type type, DialectAsmPrinter &printer) const { SetVector stack; printTestType(type, printer, stack); } + +Type TestRecursiveAliasType::getBody() const { return getImpl()->body; } + +void TestRecursiveAliasType::setBody(Type type) { (void)Base::mutate(type); } + +StringRef TestRecursiveAliasType::getName() const { return getImpl()->name; } + +Type TestRecursiveAliasType::parse(AsmParser &parser) { + thread_local static SetVector stack; + + StringRef name; + if (parser.parseLess() || parser.parseKeyword(&name)) + return Type(); + auto rec = TestRecursiveAliasType::get(parser.getContext(), name); + + // If this type already has been parsed above in the stack, expect just the + // name. + if (stack.contains(rec)) { + if (failed(parser.parseGreater())) + return Type(); + return rec; + } + + // Otherwise, parse the body and update the type. + if (failed(parser.parseComma())) + return Type(); + stack.insert(rec); + Type subtype; + if (parser.parseType(subtype)) + return nullptr; + stack.pop_back(); + if (!subtype || failed(parser.parseGreater())) + return Type(); + + rec.setBody(subtype); + + return rec; +} + +void TestRecursiveAliasType::print(AsmPrinter &printer) const { + thread_local static SetVector stack; + + printer << "<" << getName(); + if (!stack.contains(*this)) { + printer << ", "; + stack.insert(*this); + printer << getBody(); + stack.pop_back(); + } + printer << ">"; +} diff --git a/mlir/test/lib/Dialect/Test/TestTypes.h b/mlir/test/lib/Dialect/Test/TestTypes.h index c7d169d020d56..0ce86dd70ab90 100644 --- a/mlir/test/lib/Dialect/Test/TestTypes.h +++ b/mlir/test/lib/Dialect/Test/TestTypes.h @@ -91,9 +91,6 @@ struct FieldParser> { #include "TestTypeInterfaces.h.inc" -#define GET_TYPEDEF_CLASSES -#include "TestTypeDefs.h.inc" - namespace test { /// Storage for simple named recursive types, where the type is identified by @@ -150,4 +147,7 @@ class TestRecursiveType } // namespace test +#define GET_TYPEDEF_CLASSES +#include "TestTypeDefs.h.inc" + #endif // MLIR_TESTTYPES_H diff --git a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir index 30f30def56fdd..847c41fec9135 100644 --- a/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir +++ b/mlir/test/mlir-cpu-runner/test-expand-math-approx.mlir @@ -195,7 +195,7 @@ func.func @powf() { %a_p = arith.constant 2.0 : f64 call @func_powff64(%a, %a_p) : (f64, f64) -> () - // CHECK-NEXT: nan + // CHECK-NEXT: -27 %b = arith.constant -3.0 : f64 %b_p = arith.constant 3.0 : f64 call @func_powff64(%b, %b_p) : (f64, f64) -> () @@ -220,16 +220,9 @@ func.func @powf() { %f_p = arith.constant 1.2 : f64 call @func_powff64(%f, %f_p) : (f64, f64) -> () - // CHECK-NEXT: nan - %g = arith.constant 0xff80000000000000 : f64 - call @func_powff64(%g, %g) : (f64, f64) -> () - - // CHECK-NEXT: nan - %h = arith.constant 0x7fffffffffffffff : f64 - call @func_powff64(%h, %h) : (f64, f64) -> () - // CHECK-NEXT: nan %i = arith.constant 1.0 : f64 + %h = arith.constant 0x7fffffffffffffff : f64 call @func_powff64(%i, %h) : (f64, f64) -> () // CHECK-NEXT: inf diff --git a/mlir/test/mlir-tblgen/attr-or-type-format.td b/mlir/test/mlir-tblgen/attr-or-type-format.td index 230fa90713f1a..2782f55bc966e 100644 --- a/mlir/test/mlir-tblgen/attr-or-type-format.td +++ b/mlir/test/mlir-tblgen/attr-or-type-format.td @@ -648,5 +648,5 @@ def TypeN : TestType<"TestP"> { // DEFAULT_TYPE_PARSER: TestDialect::parseType(::mlir::DialectAsmParser &parser) // DEFAULT_TYPE_PARSER: auto parseResult = parseOptionalDynamicType(mnemonic, parser, genType); // DEFAULT_TYPE_PARSER: if (parseResult.has_value()) { -// DEFAULT_TYPE_PARSER: if (::mlir::succeeded(parseResult.getValue())) +// DEFAULT_TYPE_PARSER: if (::mlir::succeeded(parseResult.value())) // DEFAULT_TYPE_PARSER: return genType; \ No newline at end of file diff --git a/mlir/test/mlir-tblgen/op-decl-and-defs.td b/mlir/test/mlir-tblgen/op-decl-and-defs.td index aad7ea4437e78..077aa750352e0 100644 --- a/mlir/test/mlir-tblgen/op-decl-and-defs.td +++ b/mlir/test/mlir-tblgen/op-decl-and-defs.td @@ -141,7 +141,7 @@ def NS_AttrSizedOperandOp : NS_Op<"attr_sized_operands", Variadic:$b, I32:$c, Variadic:$d, - I32ElementsAttr:$operand_segment_sizes + I32ElementsAttr:$operandSegmentSizes ); } diff --git a/mlir/test/mlir-tblgen/op-python-bindings.td b/mlir/test/mlir-tblgen/op-python-bindings.td index de979f7e8f43e..a131209fa45cb 100644 --- a/mlir/test/mlir-tblgen/op-python-bindings.td +++ b/mlir/test/mlir-tblgen/op-python-bindings.td @@ -39,7 +39,7 @@ def AttrSizedOperandsOp : TestOp<"attr_sized_operands", // CHECK: def variadic1(self): // CHECK: operand_range = _ods_segmented_accessor( // CHECK: self.operation.operands, - // CHECK: self.operation.attributes["operand_segment_sizes"], 0) + // CHECK: self.operation.attributes["operandSegmentSizes"], 0) // CHECK: return operand_range // CHECK-NOT: if len(operand_range) // @@ -47,14 +47,14 @@ def AttrSizedOperandsOp : TestOp<"attr_sized_operands", // CHECK: def non_variadic(self): // CHECK: operand_range = _ods_segmented_accessor( // CHECK: self.operation.operands, - // CHECK: self.operation.attributes["operand_segment_sizes"], 1) + // CHECK: self.operation.attributes["operandSegmentSizes"], 1) // CHECK: return operand_range[0] // // CHECK: @builtins.property // CHECK: def variadic2(self): // CHECK: operand_range = _ods_segmented_accessor( // CHECK: self.operation.operands, - // CHECK: self.operation.attributes["operand_segment_sizes"], 2) + // CHECK: self.operation.attributes["operandSegmentSizes"], 2) // CHECK: return operand_range[0] if len(operand_range) > 0 else None let arguments = (ins Variadic:$variadic1, AnyType:$non_variadic, Optional:$variadic2); @@ -83,21 +83,21 @@ def AttrSizedResultsOp : TestOp<"attr_sized_results", // CHECK: def variadic1(self): // CHECK: result_range = _ods_segmented_accessor( // CHECK: self.operation.results, - // CHECK: self.operation.attributes["result_segment_sizes"], 0) + // CHECK: self.operation.attributes["resultSegmentSizes"], 0) // CHECK: return result_range[0] if len(result_range) > 0 else None // // CHECK: @builtins.property // CHECK: def non_variadic(self): // CHECK: result_range = _ods_segmented_accessor( // CHECK: self.operation.results, - // CHECK: self.operation.attributes["result_segment_sizes"], 1) + // CHECK: self.operation.attributes["resultSegmentSizes"], 1) // CHECK: return result_range[0] // // CHECK: @builtins.property // CHECK: def variadic2(self): // CHECK: result_range = _ods_segmented_accessor( // CHECK: self.operation.results, - // CHECK: self.operation.attributes["result_segment_sizes"], 2) + // CHECK: self.operation.attributes["resultSegmentSizes"], 2) // CHECK: return result_range // CHECK-NOT: if len(result_range) let results = (outs Optional:$variadic1, AnyType:$non_variadic, diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py index 88f48d0d544e7..b728e00837814 100644 --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -100,7 +100,7 @@ def named_form(lhs, rhs): init_result = tensor.EmptyOp([4, 8], f32) # CHECK: "linalg.matmul"(%{{.*}}) # CHECK-SAME: cast = #linalg.type_fn - # CHECK-SAME: odsOperandSegmentSizes = array + # CHECK-SAME: operandSegmentSizes = array # CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32): # CHECK-NEXT: arith.mulf{{.*}} (f32, f32) -> f32 # CHECK-NEXT: arith.addf{{.*}} (f32, f32) -> f32 diff --git a/mlir/test/python/dialects/ods_helpers.py b/mlir/test/python/dialects/ods_helpers.py index 71879bdcb51f5..0d2a18e0eb0af 100644 --- a/mlir/test/python/dialects/ods_helpers.py +++ b/mlir/test/python/dialects/ods_helpers.py @@ -96,8 +96,8 @@ class TestOp(OpView): # CHECK: %[[V0:.+]] = "custom.value" # CHECK: %[[V1:.+]] = "custom.value" # CHECK: "custom.test_op"(%[[V0]], %[[V1]]) - # CHECK-NOT: operand_segment_sizes - # CHECK-NOT: result_segment_sizes + # CHECK-NOT: operandSegmentSizes + # CHECK-NOT: resultSegmentSizes # CHECK-SAME: : (i32, i32) -> (i8, i16) print(m) @@ -128,8 +128,8 @@ class TestOp(OpView): # CHECK: %[[V2:.+]] = "custom.value" # CHECK: %[[V3:.+]] = "custom.value" # CHECK: "custom.test_op"(%[[V0]], %[[V1]], %[[V2]], %[[V3]]) - # CHECK-SAME: operand_segment_sizes = array - # CHECK-SAME: result_segment_sizes = array + # CHECK-SAME: operandSegmentSizes = array + # CHECK-SAME: resultSegmentSizes = array # CHECK-SAME: : (i32, i32, i32, i32) -> (i8, i16, i32, i64) op = TestOp.build_generic( results=[[t0, t1], t2, t3], operands=[v0, [v1, v2], v3] @@ -137,8 +137,8 @@ class TestOp(OpView): # Now test with optional omitted. # CHECK: "custom.test_op"(%[[V0]]) - # CHECK-SAME: operand_segment_sizes = array - # CHECK-SAME: result_segment_sizes = array + # CHECK-SAME: operandSegmentSizes = array + # CHECK-SAME: resultSegmentSizes = array # CHECK-SAME: (i32) -> i64 op = TestOp.build_generic( results=[None, None, t3], operands=[v0, None, None] diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp index 943e323c6af40..f6e43d42d29f0 100644 --- a/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp +++ b/mlir/tools/mlir-tblgen/AttrOrTypeDefGen.cpp @@ -763,7 +763,7 @@ static const char *const dialectDynamicTypeParserDispatch = R"( { auto parseResult = parseOptionalDynamicType(mnemonic, parser, genType); if (parseResult.has_value()) { - if (::mlir::succeeded(parseResult.getValue())) + if (::mlir::succeeded(parseResult.value())) return genType; return ::mlir::Type(); } diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 0ff72ec75f1d4..a3b9c71048422 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -56,8 +56,8 @@ static const char *const propertyDiag = "propDiag"; /// The names of the implicit attributes that contain variadic operand and /// result segment sizes. -static const char *const operandSegmentAttrName = "operand_segment_sizes"; -static const char *const resultSegmentAttrName = "result_segment_sizes"; +static const char *const operandSegmentAttrName = "operandSegmentSizes"; +static const char *const resultSegmentAttrName = "resultSegmentSizes"; /// Code for an Op to lookup an attribute. Uses cached identifiers and subrange /// lookup. @@ -158,24 +158,24 @@ static const char *const valueRangeReturnCode = R"( /// Read operand/result segment_size from bytecode. static const char *const readBytecodeSegmentSize = R"( if ($_reader.getBytecodeVersion() < /*kNativePropertiesODSSegmentSize=*/6) { - DenseI32ArrayAttr attr; - if (::mlir::failed($_reader.readAttribute(attr))) return failure(); + ::mlir::DenseI32ArrayAttr attr; + if (::mlir::failed($_reader.readAttribute(attr))) return ::mlir::failure(); if (attr.size() > static_cast(sizeof($_storage) / sizeof(int32_t))) { $_reader.emitError("size mismatch for operand/result_segment_size"); - return failure(); + return ::mlir::failure(); } - llvm::copy(ArrayRef(attr), $_storage.begin()); + llvm::copy(::llvm::ArrayRef(attr), $_storage.begin()); } else { - return $_reader.readSparseArray(MutableArrayRef($_storage)); + return $_reader.readSparseArray(::llvm::MutableArrayRef($_storage)); } )"; /// Write operand/result segment_size to bytecode. static const char *const writeBytecodeSegmentSize = R"( if ($_writer.getBytecodeVersion() < /*kNativePropertiesODSSegmentSize=*/6) - $_writer.writeAttribute(DenseI32ArrayAttr::get(getContext(), $_storage)); + $_writer.writeAttribute(::mlir::DenseI32ArrayAttr::get(getContext(), $_storage)); else - $_writer.writeSparseArray(ArrayRef($_storage)); + $_writer.writeSparseArray(::llvm::ArrayRef($_storage)); )"; /// A header for indicating code sections. @@ -430,15 +430,15 @@ void OpOrAdaptorHelper::computeAttrMetadata() { /*interfaceType=*/"::llvm::ArrayRef", /*convertFromStorageCall=*/"$_storage", /*assignToStorageCall=*/ - "llvm::copy($_value, $_storage.begin())", + "::llvm::copy($_value, $_storage.begin())", /*convertToAttributeCall=*/ - "DenseI32ArrayAttr::get($_ctxt, $_storage)", + "::mlir::DenseI32ArrayAttr::get($_ctxt, $_storage)", /*convertFromAttributeCall=*/ "return convertFromAttribute($_storage, $_attr, $_diag);", /*readFromMlirBytecodeCall=*/readBytecodeSegmentSize, /*writeToMlirBytecodeCall=*/writeBytecodeSegmentSize, /*hashPropertyCall=*/ - "llvm::hash_combine_range(std::begin($_storage), " + "::llvm::hash_combine_range(std::begin($_storage), " "std::end($_storage));", /*StringRef defaultValue=*/""); }; @@ -447,7 +447,7 @@ void OpOrAdaptorHelper::computeAttrMetadata() { if (op.getDialect().usePropertiesForAttributes()) { operandSegmentsSizeStorage = llvm::formatv("std::array", op.getNumOperands()); - operandSegmentsSize = {"odsOperandSegmentSizes", + operandSegmentsSize = {"operandSegmentSizes", makeProperty(operandSegmentsSizeStorage)}; } else { attrMetadata.insert( @@ -460,7 +460,7 @@ void OpOrAdaptorHelper::computeAttrMetadata() { if (op.getDialect().usePropertiesForAttributes()) { resultSegmentsSizeStorage = llvm::formatv("std::array", op.getNumResults()); - resultSegmentsSize = {"odsResultSegmentSizes", + resultSegmentsSize = {"resultSegmentSizes", makeProperty(resultSegmentsSizeStorage)}; } else { attrMetadata.insert( @@ -868,10 +868,12 @@ while (true) {{ if (useProperties) { for (const std::pair &it : emitHelper.getAttrMetadata()) { + const AttributeMetadata &metadata = it.second; + if (metadata.constraint && metadata.constraint->isDerivedAttr()) + continue; body << formatv( "auto tblgen_{0} = getProperties().{0}; (void)tblgen_{0};\n", it.first); - const AttributeMetadata &metadata = it.second; if (metadata.isRequired) body << formatv( "if (!tblgen_{0}) return {1}\"requires attribute '{0}'\");\n", @@ -1306,10 +1308,12 @@ void OpEmitter::genPropertiesSupport() { std::string getAttr; llvm::raw_string_ostream os(getAttr); os << " auto attr = dict.get(\"" << name << "\");"; - if (name == "odsOperandSegmentSizes") { + if (name == operandSegmentAttrName) { + // Backward compat for now, TODO: Remove at some point. os << " if (!attr) attr = dict.get(\"operand_segment_sizes\");"; } - if (name == "odsResultSegmentSizes") { + if (name == resultSegmentAttrName) { + // Backward compat for now, TODO: Remove at some point. os << " if (!attr) attr = dict.get(\"result_segment_sizes\");"; } os.flush(); @@ -1327,10 +1331,12 @@ void OpEmitter::genPropertiesSupport() { std::string getAttr; llvm::raw_string_ostream os(getAttr); os << " auto attr = dict.get(\"" << name << "\");"; - if (name == "odsOperandSegmentSizes") { + if (name == operandSegmentAttrName) { + // Backward compat for now os << " if (!attr) attr = dict.get(\"operand_segment_sizes\");"; } - if (name == "odsResultSegmentSizes") { + if (name == resultSegmentAttrName) { + // Backward compat for now os << " if (!attr) attr = dict.get(\"result_segment_sizes\");"; } os.flush(); @@ -1445,7 +1451,7 @@ void OpEmitter::genPropertiesSupport() { )decl"; const char *setInherentAttrMethodFmt = R"decl( if (name == "{0}") {{ - prop.{0} = dyn_cast_or_null>(value); + prop.{0} = ::llvm::dyn_cast_or_null>(value); return; } )decl"; @@ -1466,39 +1472,39 @@ void OpEmitter::genPropertiesSupport() { // even though it is a native property. const auto *namedProperty = cast(attrOrProp); StringRef name = namedProperty->name; - if (name != "odsOperandSegmentSizes" && name != "odsResultSegmentSizes") + if (name != operandSegmentAttrName && name != resultSegmentAttrName) continue; auto &prop = namedProperty->prop; FmtContext fctx; fctx.addSubst("_ctxt", "ctx"); fctx.addSubst("_storage", Twine("prop.") + name); - if (name == "odsOperandSegmentSizes") { + if (name == operandSegmentAttrName) { getInherentAttrMethod - << formatv(" if (name == \"odsOperandSegmentSizes\" || name == " + << formatv(" if (name == \"operand_segment_sizes\" || name == " "\"{0}\") return ", operandSegmentAttrName); } else { getInherentAttrMethod - << formatv(" if (name == \"odsResultSegmentSizes\" || name == " + << formatv(" if (name == \"result_segment_sizes\" || name == " "\"{0}\") return ", resultSegmentAttrName); } getInherentAttrMethod << tgfmt(prop.getConvertToAttributeCall(), &fctx) << ";\n"; - if (name == "odsOperandSegmentSizes") { - setInherentAttrMethod << formatv( - " if (name == \"odsOperandSegmentSizes\" || name == " - "\"{0}\") {{", - operandSegmentAttrName); + if (name == operandSegmentAttrName) { + setInherentAttrMethod + << formatv(" if (name == \"operand_segment_sizes\" || name == " + "\"{0}\") {{", + operandSegmentAttrName); } else { setInherentAttrMethod - << formatv(" if (name == \"odsResultSegmentSizes\" || name == " + << formatv(" if (name == \"result_segment_sizes\" || name == " "\"{0}\") {{", resultSegmentAttrName); } setInherentAttrMethod << formatv(R"decl( - auto arrAttr = dyn_cast_or_null(value); + auto arrAttr = ::llvm::dyn_cast_or_null<::mlir::DenseI32ArrayAttr>(value); if (!arrAttr) return; if (arrAttr.size() != sizeof(prop.{0}) / sizeof(int32_t)) return; @@ -1507,7 +1513,7 @@ void OpEmitter::genPropertiesSupport() { } )decl", name); - if (name == "odsOperandSegmentSizes") { + if (name == operandSegmentAttrName) { populateInherentAttrsMethod << formatv(" attrs.append(\"{0}\", {1});\n", operandSegmentAttrName, tgfmt(prop.getConvertToAttributeCall(), &fctx)); @@ -2015,7 +2021,7 @@ void OpEmitter::genNamedOperandGetters() { if (op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) { if (op.getDialect().usePropertiesForAttributes()) attrSizeInitCode = formatv(adapterSegmentSizeAttrInitCodeProperties, - "getProperties().odsOperandSegmentSizes"); + "getProperties().operandSegmentSizes"); else attrSizeInitCode = formatv(opSegmentSizeAttrInitCode, @@ -2056,8 +2062,8 @@ void OpEmitter::genNamedOperandSetters() { if (emitHelper.hasProperties()) body << formatv(", ::mlir::MutableOperandRange::OperandSegment({0}u, " "{{getOperandSegmentSizesAttrName(), " - "DenseI32ArrayAttr::get(getContext(), " - "getProperties().odsOperandSegmentSizes)})", + "::mlir::DenseI32ArrayAttr::get(getContext(), " + "getProperties().operandSegmentSizes)})", i); else body << formatv( @@ -2116,7 +2122,7 @@ void OpEmitter::genNamedResultGetters() { if (attrSizedResults) { if (op.getDialect().usePropertiesForAttributes()) attrSizeInitCode = formatv(adapterSegmentSizeAttrInitCodeProperties, - "getProperties().odsResultSegmentSizes"); + "getProperties().resultSegmentSizes"); else attrSizeInitCode = formatv(opSegmentSizeAttrInitCode, @@ -2291,11 +2297,11 @@ void OpEmitter::genSeparateArgParamBuilder() { << ");\n"; } - // Automatically create the 'result_segment_sizes' attribute using + // Automatically create the 'resultSegmentSizes' attribute using // the length of the type ranges. if (op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) { if (op.getDialect().usePropertiesForAttributes()) { - body << " llvm::copy(ArrayRef({"; + body << " ::llvm::copy(::llvm::ArrayRef({"; } else { std::string getterName = op.getGetterName(resultSegmentAttrName); body << " " << builderOpState << ".addAttribute(" << getterName @@ -2321,7 +2327,7 @@ void OpEmitter::genSeparateArgParamBuilder() { if (op.getDialect().usePropertiesForAttributes()) { body << "}), " << builderOpState << ".getOrAddProperties()." - "odsResultSegmentSizes.begin());\n"; + "resultSegmentSizes.begin());\n"; } else { body << "}));\n"; } @@ -2943,11 +2949,11 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( if (op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) { std::string sizes = op.getGetterName(operandSegmentAttrName); if (op.getDialect().usePropertiesForAttributes()) { - body << " llvm::copy(ArrayRef({"; + body << " ::llvm::copy(::llvm::ArrayRef({"; emitSegment(); body << "}), " << builderOpState << ".getOrAddProperties()." - "odsOperandSegmentSizes.begin());\n"; + "operandSegmentSizes.begin());\n"; } else { body << " " << builderOpState << ".addAttribute(" << sizes << "AttrName(" << builderOpState << ".name), " @@ -3819,8 +3825,7 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter( if (attr) { storageType = attr->getStorageType(); } else { - if (name != "odsOperandSegmentSizes" && - name != "odsResultSegmentSizes") { + if (name != operandSegmentAttrName && name != resultSegmentAttrName) { report_fatal_error("unexpected AttributeMetadata"); } // TODO: update to use native integers. @@ -3935,7 +3940,7 @@ OpOperandAdaptorEmitter::OpOperandAdaptorEmitter( if (op.getDialect().usePropertiesForAttributes()) sizeAttrInit = formatv(adapterSegmentSizeAttrInitCodeProperties, - llvm::formatv("getProperties().odsOperandSegmentSizes")); + llvm::formatv("getProperties().operandSegmentSizes")); else sizeAttrInit = formatv(adapterSegmentSizeAttrInitCode, emitHelper.getAttr(operandSegmentAttrName)); diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp index c38f873ddaba4..546d4616f7173 100644 --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -1662,14 +1662,14 @@ void OperationFormat::genParserVariadicSegmentResolution(Operator &op, body << "1"; }; if (op.getDialect().usePropertiesForAttributes()) { - body << "llvm::copy(ArrayRef({"; + body << "::llvm::copy(::llvm::ArrayRef({"; llvm::interleaveComma(op.getOperands(), body, interleaveFn); body << formatv("}), " "result.getOrAddProperties<{0}::Properties>()." - "odsOperandSegmentSizes.begin());\n", + "operandSegmentSizes.begin());\n", op.getCppClassName()); } else { - body << " result.addAttribute(\"operand_segment_sizes\", " + body << " result.addAttribute(\"operandSegmentSizes\", " << "parser.getBuilder().getDenseI32ArrayAttr({"; llvm::interleaveComma(op.getOperands(), body, interleaveFn); body << "}));\n"; @@ -1710,10 +1710,10 @@ void OperationFormat::genParserVariadicSegmentResolution(Operator &op, llvm::interleaveComma(op.getResults(), body, interleaveFn); body << formatv("}), " "result.getOrAddProperties<{0}::Properties>()." - "odsResultSegmentSizes.begin());\n", + "resultSegmentSizes.begin());\n", op.getCppClassName()); } else { - body << " result.addAttribute(\"result_segment_sizes\", " + body << " result.addAttribute(\"resultSegmentSizes\", " << "parser.getBuilder().getDenseI32ArrayAttr({"; llvm::interleaveComma(op.getResults(), body, interleaveFn); body << "}));\n"; @@ -1767,10 +1767,10 @@ static void genAttrDictPrinter(OperationFormat &fmt, Operator &op, // Elide the variadic segment size attributes if necessary. if (!fmt.allOperands && op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) - body << " elidedAttrs.push_back(\"operand_segment_sizes\");\n"; + body << " elidedAttrs.push_back(\"operandSegmentSizes\");\n"; if (!fmt.allResultTypes && op.getTrait("::mlir::OpTrait::AttrSizedResultSegments")) - body << " elidedAttrs.push_back(\"result_segment_sizes\");\n"; + body << " elidedAttrs.push_back(\"resultSegmentSizes\");\n"; for (const StringRef key : fmt.inferredAttributes.keys()) body << " elidedAttrs.push_back(\"" << key << "\");\n"; for (const NamedAttribute *attr : fmt.usedAttributes) diff --git a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp index dd6e52d300efe..7c7b991fb7b07 100644 --- a/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp +++ b/mlir/tools/mlir-tblgen/OpPythonBindingGen.cpp @@ -170,7 +170,7 @@ constexpr const char *opVariadicSegmentTemplate = R"Py( def {0}(self): {1}_range = _ods_segmented_accessor( self.operation.{1}s, - self.operation.attributes["{1}_segment_sizes"], {2}) + self.operation.attributes["{1}SegmentSizes"], {2}) return {1}_range{3} )Py"; diff --git a/mlir/unittests/IR/AdaptorTest.cpp b/mlir/unittests/IR/AdaptorTest.cpp index ec15d30875755..4a01d2c52b645 100644 --- a/mlir/unittests/IR/AdaptorTest.cpp +++ b/mlir/unittests/IR/AdaptorTest.cpp @@ -39,7 +39,7 @@ TEST(Adaptor, GenericAdaptorsOperandAccess) { // value from the value 0. SmallVector> v = {0, 4}; OIListSimple::Properties prop; - prop.odsOperandSegmentSizes = {1, 0, 1}; + prop.operandSegmentSizes = {1, 0, 1}; OIListSimple::GenericAdaptor>> d(v, {}, prop, {}); EXPECT_EQ(d.getArg0(), 0); diff --git a/mlir/unittests/IR/DialectTest.cpp b/mlir/unittests/IR/DialectTest.cpp index a2b58bf731976..e99d46e6d2643 100644 --- a/mlir/unittests/IR/DialectTest.cpp +++ b/mlir/unittests/IR/DialectTest.cpp @@ -136,4 +136,50 @@ TEST(Dialect, RepeatedDelayedRegistration) { EXPECT_TRUE(testDialectInterface != nullptr); } +namespace { +/// A dummy extension that increases a counter when being applied and +/// recursively adds additional extensions. +struct DummyExtension : DialectExtension { + DummyExtension(int *counter, int numRecursive) + : DialectExtension(), counter(counter), numRecursive(numRecursive) {} + + void apply(MLIRContext *ctx, TestDialect *dialect) const final { + ++(*counter); + DialectRegistry nestedRegistry; + for (int i = 0; i < numRecursive; ++i) + nestedRegistry.addExtension( + std::make_unique(counter, /*numRecursive=*/0)); + // Adding additional extensions may trigger a reallocation of the + // `extensions` vector in the dialect registry. + ctx->appendDialectRegistry(nestedRegistry); + } + +private: + int *counter; + int numRecursive; +}; +} // namespace + +TEST(Dialect, NestedDialectExtension) { + DialectRegistry registry; + registry.insert(); + + // Add an extension that adds 100 more extensions. + int counter1 = 0; + registry.addExtension(std::make_unique(&counter1, 100)); + // Add one more extension. This should not crash. + int counter2 = 0; + registry.addExtension(std::make_unique(&counter2, 0)); + + // Load dialect and apply extensions. + MLIRContext context(registry); + Dialect *testDialect = context.getOrLoadDialect(); + ASSERT_TRUE(testDialect != nullptr); + + // Extensions may be applied multiple times. Make sure that each expected + // extension was applied at least once. + EXPECT_GE(counter1, 101); + EXPECT_GE(counter2, 1); +} + } // namespace diff --git a/mlir/unittests/Pass/PassManagerTest.cpp b/mlir/unittests/Pass/PassManagerTest.cpp index 97349d681c3a0..70a679125c0ea 100644 --- a/mlir/unittests/Pass/PassManagerTest.cpp +++ b/mlir/unittests/Pass/PassManagerTest.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Diagnostics.h" #include "mlir/Pass/Pass.h" #include "gtest/gtest.h" @@ -144,4 +145,39 @@ TEST(PassManagerTest, InvalidPass) { "intend to nest?"); } +/// Simple pass to annotate a func::FuncOp with the results of analysis. +struct InitializeCheckingPass + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(InitializeCheckingPass) + LogicalResult initialize(MLIRContext *ctx) final { + initialized = true; + return success(); + } + bool initialized = false; + + void runOnOperation() override { + if (!initialized) { + getOperation()->emitError() << "Pass isn't initialized!"; + signalPassFailure(); + } + } +}; + +TEST(PassManagerTest, PassInitialization) { + MLIRContext context; + context.allowUnregisteredDialects(); + + // Create a module + OwningOpRef module(ModuleOp::create(UnknownLoc::get(&context))); + + // Instantiate and run our pass. + auto pm = PassManager::on(&context); + pm.addPass(std::make_unique()); + EXPECT_TRUE(succeeded(pm.run(module.get()))); + + // Adding a second copy of the pass, we should also initialize it! + pm.addPass(std::make_unique()); + EXPECT_TRUE(succeeded(pm.run(module.get()))); +} + } // namespace diff --git a/openmp/libomptarget/src/OmptCallback.cpp b/openmp/libomptarget/src/OmptCallback.cpp index cd44d0903be9c..4882a762adbf6 100644 --- a/openmp/libomptarget/src/OmptCallback.cpp +++ b/openmp/libomptarget/src/OmptCallback.cpp @@ -71,7 +71,8 @@ static uint64_t createRegionId() { } void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, - size_t Size, void *Code) { + void **TgtPtrBegin, size_t Size, + void *Code) { beginTargetDataOperation(); if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI @@ -79,7 +80,7 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, ompt_callback_target_data_op_emi_fn( ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId, ompt_target_data_alloc, HstPtrBegin, - /* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr, + /* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin, /* TgtDeviceNum */ DeviceId, Size, Code); } else if (ompt_callback_target_data_op_fn) { // HostOpId is set by the runtime @@ -87,13 +88,14 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, // Invoke the tool supplied data op callback ompt_callback_target_data_op_fn( TargetData.value, HostOpId, ompt_target_data_alloc, HstPtrBegin, - /* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr, + /* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin, /* TgtDeviceNum */ DeviceId, Size, Code); } } void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, - size_t Size, void *Code) { + void **TgtPtrBegin, size_t Size, + void *Code) { // Only EMI callback handles end scope if (ompt_callback_target_data_op_emi_fn) { // HostOpId will be set by the tool. Invoke the tool supplied data op EMI @@ -101,7 +103,7 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, ompt_callback_target_data_op_emi_fn( ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId, ompt_target_data_alloc, HstPtrBegin, - /* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr, + /* SrcDeviceNum */ omp_get_initial_device(), *TgtPtrBegin, /* TgtDeviceNum */ DeviceId, Size, Code); } endTargetDataOperation(); diff --git a/openmp/libomptarget/src/OmptInterface.h b/openmp/libomptarget/src/OmptInterface.h index c3a52969bf80e..178cedacf4a58 100644 --- a/openmp/libomptarget/src/OmptInterface.h +++ b/openmp/libomptarget/src/OmptInterface.h @@ -47,12 +47,12 @@ static ompt_get_target_task_data_t ompt_get_target_task_data_fn; class Interface { public: /// Top-level function for invoking callback before device data allocation - void beginTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, - void *Code); + void beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + void **TgtPtrBegin, size_t Size, void *Code); /// Top-level function for invoking callback after device data allocation - void endTargetDataAlloc(int64_t DeviceId, void *TgtPtrBegin, size_t Size, - void *Code); + void endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin, + void **TgtPtrBegin, size_t Size, void *Code); /// Top-level function for invoking callback before data submit void beginTargetDataSubmit(int64_t DeviceId, void *HstPtrBegin, diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 276b7c9f499c5..1421408435c2c 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -561,12 +561,14 @@ __tgt_target_table *DeviceTy::loadBinary(void *Img) { void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) { /// RAII to establish tool anchors before and after data allocation + void *TargetPtr = nullptr; OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( RegionInterface.getCallbacks(), - RTLDeviceID, HstPtr, Size, + RTLDeviceID, HstPtr, &TargetPtr, Size, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + TargetPtr = RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind); + return TargetPtr; } int32_t DeviceTy::deleteData(void *TgtAllocBegin, int32_t Kind) { diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp index 7fb72e16088ce..d47f0a3458587 100644 --- a/openmp/libomptarget/src/interface.cpp +++ b/openmp/libomptarget/src/interface.cpp @@ -108,6 +108,21 @@ targetDataMapper(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, TargetAsyncInfoTy TargetAsyncInfo(Device); AsyncInfoTy &AsyncInfo = TargetAsyncInfo; + /// RAII to establish tool anchors before and after data begin / end / update + OMPT_IF_BUILT(assert((TargetDataFunction == targetDataBegin || + TargetDataFunction == targetDataEnd || + TargetDataFunction == targetDataUpdate) && + "Encountered unexpected TargetDataFunction during " + "execution of targetDataMapper"); + auto CallbackFunctions = + (TargetDataFunction == targetDataBegin) + ? RegionInterface.getCallbacks() + : (TargetDataFunction == targetDataEnd) + ? RegionInterface.getCallbacks() + : RegionInterface.getCallbacks(); + InterfaceRAII TargetDataRAII(CallbackFunctions, DeviceId, + OMPT_GET_RETURN_ADDRESS(0));) + int Rc = OFFLOAD_SUCCESS; Rc = TargetDataFunction(Loc, Device, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, AsyncInfo, @@ -129,12 +144,6 @@ EXTERN void __tgt_target_data_begin_mapper(ident_t *Loc, int64_t DeviceId, map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); - /// RAII to establish tool anchors before and after data begin - OMPT_IF_BUILT(InterfaceRAII TargetDataEnterRAII( - RegionInterface.getCallbacks(), - DeviceId, - /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataBegin, "Entering OpenMP data region", "begin"); @@ -161,12 +170,6 @@ EXTERN void __tgt_target_data_end_mapper(ident_t *Loc, int64_t DeviceId, map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); - /// RAII to establish tool anchors before and after data end - OMPT_IF_BUILT(InterfaceRAII TargetDataExitRAII( - RegionInterface.getCallbacks(), - DeviceId, - /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - targetDataMapper(Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataEnd, "Exiting OpenMP data region", "end"); @@ -190,12 +193,6 @@ EXTERN void __tgt_target_data_update_mapper(ident_t *Loc, int64_t DeviceId, map_var_info_t *ArgNames, void **ArgMappers) { TIMESCOPE_WITH_IDENT(Loc); - /// RAII to establish tool anchors before and after data update - OMPT_IF_BUILT(InterfaceRAII TargetDataUpdateRAII( - RegionInterface.getCallbacks(), - DeviceId, - /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) - targetDataMapper( Loc, DeviceId, ArgNum, ArgsBase, Args, ArgSizes, ArgTypes, ArgNames, ArgMappers, targetDataUpdate, "Updating OpenMP data", "update"); @@ -295,7 +292,8 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams, DeviceTy &Device = *PM->Devices[DeviceId]; TargetAsyncInfoTy TargetAsyncInfo(Device); AsyncInfoTy &AsyncInfo = TargetAsyncInfo; - OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( + /// RAII to establish tool anchors before and after target region + OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) @@ -363,7 +361,8 @@ EXTERN int __tgt_target_kernel_replay(ident_t *Loc, int64_t DeviceId, return OMP_TGT_FAIL; } DeviceTy &Device = *PM->Devices[DeviceId]; - OMPT_IF_BUILT(InterfaceRAII TargetDataAllocRAII( + /// RAII to establish tool anchors before and after target region + OMPT_IF_BUILT(InterfaceRAII TargetRAII( RegionInterface.getCallbacks(), DeviceId, /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));) diff --git a/openmp/libomptarget/test/ompt/veccopy_data.c b/openmp/libomptarget/test/ompt/veccopy_data.c new file mode 100644 index 0000000000000..5bbc47dc11a7d --- /dev/null +++ b/openmp/libomptarget/test/ompt/veccopy_data.c @@ -0,0 +1,128 @@ +// RUN: %libomptarget-compile-run-and-check-generic +// REQUIRES: ompt +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-oldDriver +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO + +/* + * Example OpenMP program that registers EMI callbacks. + * Explicitly testing for an initialized device num and + * #pragma omp target [data enter / data exit / update] + * The latter with the addition of a nowait clause. + */ + +#include +#include + +#include "callbacks.h" +#include "register_emi.h" + +#define N 100000 + +#pragma omp declare target +int c[N]; +#pragma omp end declare target + +int main() { + int a[N]; + int b[N]; + + int i; + + for (i = 0; i < N; i++) + a[i] = 0; + + for (i = 0; i < N; i++) + b[i] = i; + + for (i = 0; i < N; i++) + c[i] = 0; + +#pragma omp target enter data map(to : a) +#pragma omp target parallel for + { + for (int j = 0; j < N; j++) + a[j] = b[j]; + } +#pragma omp target exit data map(from : a) + +#pragma omp target parallel for map(alloc : c) + { + for (int j = 0; j < N; j++) + c[j] = 2 * j + 1; + } +#pragma omp target update from(c) nowait +#pragma omp barrier + + int rc = 0; + for (i = 0; i < N; i++) { + if (a[i] != i) { + rc++; + printf("Wrong value: a[%d]=%d\n", i, a[i]); + } + } + + for (i = 0; i < N; i++) { + if (c[i] != 2 * i + 1) { + rc++; + printf("Wrong value: c[%d]=%d\n", i, c[i]); + } + } + + if (!rc) + printf("Success\n"); + + return rc; +} + +/// CHECK-NOT: Callback Target EMI: +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Init: +/// CHECK: Callback Load: +/// CHECK: Callback Target EMI: kind=2 endpoint=1 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Target EMI: kind=2 endpoint=2 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=1 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) +/// CHECK: Callback DataOp EMI: endpoint=1 optype=2 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=2 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Target EMI: kind=3 endpoint=1 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=4 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=4 +/// CHECK: Callback Target EMI: kind=3 endpoint=2 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Target EMI: kind=1 endpoint=1 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 +/// CHECK: Callback Submit EMI: endpoint=2 req_num_teams=1 +/// CHECK: Callback Target EMI: kind=1 endpoint=2 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Target EMI: kind=4 endpoint=1 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback DataOp EMI: endpoint=1 optype=3 +/// CHECK: Callback DataOp EMI: endpoint=2 optype=3 +/// CHECK: Callback Target EMI: kind=4 endpoint=2 +/// CHECK-NOT: device_num=-1 +/// CHECK: Callback Fini: diff --git a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c index 6fdcfdb035375..9d3498dc72d23 100644 --- a/openmp/libomptarget/test/ompt/veccopy_disallow_both.c +++ b/openmp/libomptarget/test/ompt/veccopy_disallow_both.c @@ -63,10 +63,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=1 @@ -82,10 +84,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit: target_id=[[TARGET_ID:[0-9]+]] host_op_id=[[HOST_OP_ID:[0-9]+]] req_num_teams=0 diff --git a/openmp/libomptarget/test/ompt/veccopy_emi.c b/openmp/libomptarget/test/ompt/veccopy_emi.c index f15dfb18da46f..5adf302bd1fff 100644 --- a/openmp/libomptarget/test/ompt/veccopy_emi.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi.c @@ -61,10 +61,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 @@ -81,10 +83,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0 diff --git a/openmp/libomptarget/test/ompt/veccopy_emi_map.c b/openmp/libomptarget/test/ompt/veccopy_emi_map.c index af0743f0369c5..edf08325c41ba 100644 --- a/openmp/libomptarget/test/ompt/veccopy_emi_map.c +++ b/openmp/libomptarget/test/ompt/veccopy_emi_map.c @@ -62,10 +62,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=1 @@ -82,10 +84,12 @@ int main() { /// CHECK: Callback Target EMI: kind=1 endpoint=1 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback DataOp EMI: endpoint=1 optype=1 /// CHECK: Callback DataOp EMI: endpoint=2 optype=1 +/// CHECK-NOT: dest=(nil) /// CHECK: Callback DataOp EMI: endpoint=1 optype=2 /// CHECK: Callback DataOp EMI: endpoint=2 optype=2 /// CHECK: Callback Submit EMI: endpoint=1 req_num_teams=0